In [1]:
# Import the modules
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [2]:
# Read in the CSV file and create the Pandas DataFrame
customers_df = pd.read_csv('./Resources/customer-shopping-data.csv')

# Review the DataFrame
customers_df.head()

Unnamed: 0,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicassen,Method
0,12669,9656,7561,214,2674,1338,Retail
1,7057,9810,9568,1762,3293,1776,Retail
2,6353,8808,7684,2405,3516,7844,Retail
3,13265,1196,4221,6404,507,1788,HotelRestCafe
4,22615,5410,7198,3915,1777,5185,Retail


In [3]:
# Check the DataFrame data types
customers_df.dtypes

Fresh                int64
Milk                 int64
Grocery              int64
Frozen               int64
Detergents_Paper     int64
Delicassen           int64
Method              object
dtype: object

In [4]:
# Get the column names.
customers_df.columns

Index(['Fresh', 'Milk', 'Grocery', 'Frozen', 'Detergents_Paper', 'Delicassen',
       'Method'],
      dtype='object')

In [6]:
# Scaling the numeric columns: 'Fresh', 'Milk', 'Grocery', 'Frozen', 'Detergents_Paper', 'Delicassen' columns
customers_scaled = StandardScaler().fit_transform(customers_df[['Fresh', 'Milk', 'Grocery', 'Frozen', 
                                                                'Detergents_Paper', 'Delicassen']])
# Display the arrays. 
customers_scaled[0]

array([ 0.05293319,  0.52356777, -0.04111489, -0.58936716, -0.04356873,
       -0.06633906])

In [7]:
# Creating a DataFrame with with the scaled data
customers_transformed = pd.DataFrame(customers_scaled, columns=['Fresh', 'Milk', 'Grocery', 'Frozen', 
                                        'Detergents_Paper', 'Delicassen'])

# Display sample data
customers_transformed.head()

Unnamed: 0,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicassen
0,0.052933,0.523568,-0.041115,-0.589367,-0.043569,-0.066339
1,-0.391302,0.544458,0.170318,-0.270136,0.086407,0.089151
2,-0.447029,0.408538,-0.028157,-0.137536,0.133232,2.243293
3,0.100111,-0.62402,-0.392977,0.687144,-0.498588,0.093411
4,0.840239,-0.052396,-0.079356,0.173859,-0.231918,1.299347


In [8]:
# Transform the "Method" column using get_dummies()
purchase_method = pd.get_dummies(customers_df['Method'], dtype=int)

# Display the transformed data
purchase_method.head()

Unnamed: 0,HotelRestCafe,Retail
0,0,1
1,0,1
2,0,1
3,1,0
4,0,1


In [9]:
# Concatenate the df_shopping_transformed and the card_dummies DataFrames
customers_transformed_df = pd.concat([customers_transformed, purchase_method], axis=1)

# Display concatenated DataFrame
customers_transformed_df

Unnamed: 0,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicassen,HotelRestCafe,Retail
0,0.052933,0.523568,-0.041115,-0.589367,-0.043569,-0.066339,0,1
1,-0.391302,0.544458,0.170318,-0.270136,0.086407,0.089151,0,1
2,-0.447029,0.408538,-0.028157,-0.137536,0.133232,2.243293,0,1
3,0.100111,-0.624020,-0.392977,0.687144,-0.498588,0.093411,1,0
4,0.840239,-0.052396,-0.079356,0.173859,-0.231918,1.299347,0,1
...,...,...,...,...,...,...,...,...
435,1.401312,0.848446,0.850760,2.075222,-0.566831,0.241091,1,0
436,2.155293,-0.592142,-0.757165,0.296561,-0.585519,0.291501,1,0
437,0.200326,1.314671,2.348386,-0.543380,2.511218,0.121456,0,1
438,-0.135384,-0.517536,-0.602514,-0.419441,-0.569770,0.213046,1,0
