# Brand Analysis

In [None]:
import pandas as pd

df  = pd.read_csv('2019-Oct.csv')

print(df.head())

event_time: The exact time when the activity occurred by a user

event_type: The type of activity occurred; there are three types in our case, i.e, view, cart, and purchase

product_id: The unique ID of a particular product

category_id: The unique ID of the category to which the product belongs to

category_code: The unique category code to which the product belongs to

brand: The brand name of the selected product

price: The price of the selected product

user_id: The unique ID of the user

user_session: The unique ID generated every time a user visits the site. It is different for every visit of a particular user

In [None]:
df_with_brand = df[df['brand'].notna()]

df_without_brand = df[df['brand'].isna()]

#puchase with brand
df_with_brand = df_with_brand[df_with_brand['event-type']=='purchase']
print(df_with_brand.head())

#purchase without brand
df_without_brand = df_without_brand[df_with_brand['event-type']=='purchase']
print(df_without_brand.head())

In [None]:
#length of purchase events in total
original_length = df[df['event-type']=='purchase']

#length of purchases with brand
with_brand_length = df_with_brand.shape[1]
print("Percentage of purchases with brand: "+ (with_brand_length/original_length)*100)

#length of purchases without brand
without_brand_length = df_without_brand.shape[1]
print("Percentage of purchases without brand: "+ (without_brand_length/original_length)*100)

### The hypothesis
A hypothesis can be drawn based on the above results.

For marketers, most of the marketing budget should be allotted to the advertisement of branded products.

For inventors or entrepreneurs, always introduce the product with a brand name because products without a brand have a very low probability of getting bought.

In [None]:
#convert event-time to datetime column
df['event_time'] = pd.to_datetime(df['event_time'])

#compute and append week_day, day_of_month and hour features
df['week_day'] = df['event_time'].day_of_week
df['day'] = df['event_time'].day
df['hour'] = df['event_time'].hour

print(df.head())

### Weekly Analysis
In this part, we will review a weekly analysis of the number of views. This will reveal the day of the week on which the most or least number of views occur for the website.

In [None]:
import matplotlib.pyplot as plt

In [None]:
# get all view events of all users
view_events = df[df['event-type']=='view']

#plot of the number of views against all the days of the week
view_plot = view_events['week_day'].value_counts().sort_values().plot(kind='line',figsize=(15,6))

# set properties of the plot
view_plot.set_xlabel('Days of the week',font_size=15)
view_plot.set_ylabel('Number of Views',font_size=15)
view_plot.set_title('Number of views per day of the week',font_size=15)
view_plot.get_xticklabels(('Mon','Tue','Wed','Thur','Fri', 'Sat','Sun'),rotation='horizontal', font_size=15)

# show the plot
plt.show()

In [None]:
# get all view events of all users
view_events = df[df['event-type']=='view']

#plot of the number of views against all the days of the week
view_plot = view_events['hour'].value_counts().sort_values().plot(kind='line',figsize=(15,6))

# set properties of the plot
view_plot.set_xlabel('Hour of the day',font_size=15)
view_plot.set_ylabel('Number of Views',font_size=15)
view_plot.set_title('Number of views per hour of the day',font_size=15)
view_plot.get_xticklabels(np.arange(1,24),rotation='horizontal', font_size=15)

# show the plot
plt.show()

The hypothesis#

In [None]:
### Select all puchase events
purchase_events_df = df[df['event']=='purchase']

### Group data on brands
brands_group = purchase_events_df.groupby('brand')

### Number of products bought in each brand
top_brands = brands_group.agg('len')
top_brands.sort_values('len', ascending=False,inplace=True)

print(top_brands)

In [None]:
### Select all puchase events
purchase_events_df = df[df['event']=='purchase']

### Group data on brands
brands_group = purchase_events_df.groupby('brand')

### Number of products bought in each brand
top_brands = brands_group.agg('len')
top_brands.sort_values('len', ascending=False,inplace=True)

print(top_brands)