### Loading the data into pandas DataFrame

In [1]:
import pandas as pd

df = pd.read_csv('../Data/purchase_history.csv')

#...Appending "," at the end of each id to add the strings later
df['id'] = df['id'].apply(lambda x: x + ',')
df.head()

Unnamed: 0,user_id,id
0,222087,2726
1,1343649,64717
2,404134,1812232227433820351
3,1110200,923220264737
4,224107,"31,18,5,13,1,21,48,16,26,2,44,32,20,37,42,35,4..."


## Grouping the products purchsed by each customer separately using groupby

In [2]:
grouped_df = df.groupby(['user_id'])['id'].agg('sum').reset_index()

###### Before grouping for a sample user_id : 223

In [3]:
df[df['user_id'] == 223]

Unnamed: 0,user_id,id
10297,223,163529414532
23045,223,1722238


###### After grouping for a sample user_id : 223

In [4]:
grouped_df[grouped_df['user_id'] == 223]

Unnamed: 0,user_id,id
4,223,1635294145321722238


#### Removing comma(,) at the end and coverting the string into list containing integers

In [5]:
grouped_df['id'] = grouped_df['id'].apply(lambda x: list(map(int,x[:-1].split(','))))
grouped_df.head()

Unnamed: 0,user_id,id
0,47,"[3, 4, 2, 13, 44, 16, 24, 28, 23, 45, 15, 46, 25]"
1,68,"[23, 10, 42, 39, 19, 6, 17, 25]"
2,113,"[46, 33, 30, 32, 43, 9, 38, 21, 15, 3, 25]"
3,123,"[4, 10, 27, 18]"
4,223,"[1, 6, 35, 29, 41, 45, 32, 17, 2, 22, 38]"


## Creating a Frequency Distribution of each item purchased by individual Customer 

In [6]:
columns = ['id'] + ['item_'+str(i) for i  in range(1,49)]
purchase_df = pd.DataFrame(columns=columns)

for index_count in range(len(grouped_df)):
    items_list = grouped_df.id[index_count]
    row = [grouped_df.user_id[index_count]]
    for number in range(1,49):
        count = 0
        for item in items_list:
            if item == number:
                count += 1
        row.append(count)
    purchase_df = purchase_df.append(pd.Series(row,index=columns),ignore_index=True)

purchase_df.tail()

Unnamed: 0,id,item_1,item_2,item_3,item_4,item_5,item_6,item_7,item_8,item_9,...,item_39,item_40,item_41,item_42,item_43,item_44,item_45,item_46,item_47,item_48
24880,1499730,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
24881,1499778,0,1,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,1
24882,1499797,0,0,1,0,0,1,0,0,0,...,1,0,0,0,0,1,1,0,0,1
24883,1499817,1,2,0,0,0,1,0,1,1,...,0,0,0,2,1,2,1,0,1,1
24884,1499974,0,0,0,0,1,0,1,1,0,...,1,0,0,0,0,0,0,0,0,0


#### Saving the prepared purchase history data to a csv file

In [7]:
purchase_df.to_csv('../Data/prepared_purchased_history.csv', index=False)

- purchase_history is modified(grouped by user_id and found FreqDist) as required and saved to prepared_purchased_history.csv
- prepared_purchased_history is loaded and Task1 is accomplished in Challenge1.ipynb 