## Import

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

## Read

In [None]:
ipl_data = pd.read_csv('../input/ipl-2017/IPL 2017 - Adv Report v1.csv')
ipl_data.head()

In [None]:
ipl_data = ipl_data.iloc[:,1:]
ipl_data.head()

In [None]:
ipl_data.shape

## NA?

In [None]:
ipl_data.isnull().any(axis=0)

In [None]:
ipl_data.isnull().any(axis=1)

In [None]:
ipl_data[ipl_data.isnull().any(axis=1)]

## Replace

In [None]:
ipl_data['Cost'] = ipl_data['Cost'].str.replace(',',"")
ipl_data.head()

In [None]:
ipl_data["Impressions´000 {Av(Wg)}"] = ipl_data["Impressions´000 {Av(Wg)}"].str.replace(',',"")
ipl_data['Rate/10 Sec'] = ipl_data['Rate/10 Sec'].str.replace(',',"")
ipl_data["Impressions´000 {Av(Wg)}"] = ipl_data["Impressions´000 {Av(Wg)}"].astype('float64')
ipl_data['Rate/10 Sec'] = ipl_data['Rate/10 Sec'].astype(int)
ipl_data.head()

## NaN

In [None]:
ipl_data['Cost'] = ipl_data['Cost'].astype(float) # not int

In [None]:
ipl_data["Cost"][ipl_data["Advertiser"] == "AMITY EDUCATION GROUP"].mean()

In [None]:
ipl_data["Cost"] = ipl_data["Cost"].fillna(ipl_data["Cost"][ipl_data["Advertiser"] == "AMITY EDUCATION GROUP"].mean())
ipl_data.iloc[6634,:]

In [None]:
ipl_data['Cost'] = ipl_data['Cost'].astype(int)

## Change column names

In [None]:
names=ipl_data.columns.tolist()
names[names.index('Impressions´000 {Av(Wg)}')]='Impressions'
ipl_data.columns=names
ipl_data.head()

## Group-by Channel

In [None]:
Channel_data=ipl_data.groupby("Channel").Impressions.mean()
Channel_data

In [None]:
labels='Sony Max','Sony Six HD','Sony Six (V)'
explode=(0.2,0.4,0.1)
plt.pie(Channel_data, labels=labels, explode=explode , shadow=True, startangle=90)
plt.title('Channel-wise Impressions', bbox={'facecolor':'0.8', 'pad':5})
plt.show()

## Group-by Sector

In [None]:
ipl_grp_sector = ipl_data.groupby("Sector")
ipl_grp_sector["Sector"].count().sort_values(ascending = False)

In [None]:
ipl_grp_month = ipl_data.groupby("Month")
ipl_grp_month["Month"].count().sort_values(ascending = False)

## Sector to Impressions

In [None]:
sectorwise_data=ipl_data.groupby('Sector').Impressions.mean().sort_values()
sectorwise_data

In [None]:
plt.figure(figsize = (15,8))
sectorwise_data.plot(kind = "bar")

## Brand to Impression

In [None]:
brandwise_data=ipl_data.groupby('Brand').Impressions.mean()
brandwise_data.sort_values(ascending=False).head().sort_values().plot(kind = "barh")

## Advertisement Count

In [None]:
ipl_data.groupby('Brand').Advertiser.count().sort_values(ascending = False).head(10).sort_values().plot(kind = "barh")

In [None]:
ipl_data['Start Time {Av(Tm)}']=pd.to_datetime(ipl_data['Start Time {Av(Tm)}'])
ipl_data['hour']=ipl_data['Start Time {Av(Tm)}'].dt.hour
hourwise_data=ipl_data.groupby('hour').Impressions.mean()

In [None]:
plt.figure(figsize = (10,8))
plt.plot(hourwise_data)
plt.ylabel(' Average Impressions')
plt.xlabel('Hour of the Day in 24 Hr Format')

## Company Spends

In [None]:
impressions_by_sector_across_channels_table = pd.pivot_table(ipl_data, values='Impressions', index=['Channel'],columns=['Sector'], aggfunc=np.median)
round(impressions_by_sector_across_channels_table,2)

In [None]:
ipl_tel_grp_p = impressions_by_sector_across_channels_table[["Alcoholic Drinks","Banking/Finance/Investment","Telecom/Internet Service Providers","Food & Beverages","Auto"]].plot(title = 'Sector-Wise Impressions Across Channel', kind = "bar")
ipl_tel_grp_p = ipl_tel_grp_p.get_figure()
ipl_tel_grp_p.set_size_inches(22, 12)

## Telecom Sector

In [None]:
ipl_tel = ipl_data[(ipl_data['Sector'] == 'Telecom Products')]
ipl_tel

## Advertiser and Impressions

In [None]:
ipl_tel_grp = ipl_tel.groupby('Advertiser')
ipl_tel_grp_n = ipl_tel_grp["Impressions"].mean().sort_values(ascending=False)
ipl_tel_grp_n.plot(kind="bar")

## Company Spends

In [None]:
ipl_tel_grp_p = ipl_tel_grp[["Cost", "Rate/10 Sec"]].mean().sort_values(by = "Cost").plot(title = 'Cost & Rate/10 Sec', kind = "bar")
ipl_tel_grp_p = ipl_tel_grp_p.get_figure()
ipl_tel_grp_p.set_size_inches(22, 12)

## Advertisement Length

In [None]:
ipl_tel_grp_len = ipl_tel_grp[["Length [sec] {Av}"]].mean().sort_values(by = "Length [sec] {Av}")
ipl_tel_grp_len['Length Median'] = ipl_tel_grp[["Length [sec] {Av}"]].median()
ipl_tel_grp_len['Length Max'] = ipl_tel_grp[["Length [sec] {Av}"]].max()
ipl_tel_grp_len['Length Min'] = ipl_tel_grp[["Length [sec] {Av}"]].min()
ipl_tel_grp_len['Length Sum'] = ipl_tel_grp[["Length [sec] {Av}"]].sum()
ipl_tel_grp_len

In [None]:
ipl_tel_grp_len['Length Sum'].sort_values()

## Moto | Rel (Length)

In [None]:
ipl_mot_rel = ipl_tel[(ipl_tel["Advertiser"] == "MOTOROLA") | (ipl_tel["Advertiser"] == "RELIANCE JIO INFOCOMM LTD")]

In [None]:
sns.boxplot(x = "Advertiser", y = "Length [sec] {Av}", data = ipl_mot_rel, palette = "Set2", )

## Cost to Impressions

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(11.7, 8.27)
out = pd.cut(ipl_tel["Cost"], bins = [40000, 200000, 400000, 600000, 800000, 1000000])
sns.barplot(x = out, y = "Impressions", data = ipl_tel, ax = ax)

## Length to Impressions

In [None]:
#Is length related to impressions?
ipl_tel = ipl_data[(ipl_data['Sector'] == 'Telecom Products')]
fig, ax = plt.subplots()
fig.set_size_inches(11.7, 8.27)
out = pd.cut(ipl_tel["Length [sec] {Av}"], bins = [2, 5, 15, 25, 35, 45])
sns.barplot(x = out, y = "Impressions", data = ipl_tel)

## Impression and Cost for the 4th month

In [None]:
ipl_m4 = ipl_tel[(ipl_tel['Month'] == 4)]
plt.figure(figsize = (20,8))
plt.subplots_adjust(wspace = 0.1)
plt.subplot(1,2,1)
sns.barplot(x = "Advertiser", y = "Cost", data = ipl_m4)
plt.xticks(rotation = 90)
plt.subplot(1,2,2)
sns.barplot(x = "Advertiser", y = "Impressions", data = ipl_m4)
plt.xticks(rotation = 90)
plt.show()

## Impression and Cost for the 5th month

In [None]:
ipl_m4 = ipl_tel[(ipl_tel['Month'] == 5)]
plt.figure(figsize = (20,8))
plt.subplot(1,2,1)
sns.barplot(x = "Advertiser", y = "Cost", data = ipl_m4)
plt.xticks(rotation = 90)
plt.subplot(1,2,2)
sns.barplot(x = "Advertiser", y = "Impressions", data = ipl_m4)
plt.xticks(rotation = 90)
plt.show()
# Reliance came in the 5th month

In [None]:
def tel_ad(adv):    
    ipl_tel_ad = ipl_tel[ipl_tel['Advertiser'] == adv]
    return ipl_tel_ad['Description'].value_counts()

In [None]:
print(tel_ad("APPLE COMPUTER INDIA"))

In [None]:
labels = ['APPLE IPHONE 7', 'APPLE IPHONE 7 PLUS']
plt.pie(tel_ad("APPLE COMPUTER INDIA"), labels = labels, startangle=90)
plt.show()