
### Loading dataset & Customer Demographics

In [23]:
import numpy as np
d = np.genfromtxt('Mall_Customers.csv', delimiter=',', skip_header=1, 
                     dtype=[('id',int),('gender','U6'),('age',int),('income',int),('score',int)])
data = np.column_stack((d['id'],d['gender'], d['age'],d['income'],d['score']))

print("mean[age, income, score] = ",np.mean(data[:,2:].astype(int), axis=0))
print("min[age, income, score] = ",np.min(data[:,2:].astype(int), axis=0))
print("max[age, income, score] = ",np.max(data[:,2:].astype(int), axis=0))

avg_male_income = np.mean(data[data[:,1]=='Male'][:,3].astype(int)).round(2)
avg_female_income=np.mean(data[data[:,1]=='Female'][:,3].astype(int)).round(2)

print("Male average income=",avg_male_income)
print("Female average income=",avg_female_income)
print("Male earns MORE than female" if avg_male_income > avg_female_income else "Female earns MORE than male")

avg_male_spending_score = np.mean(data[data[:,1]=='Male'][:,-1].astype(int)).round(2)
avg_female_spending_score= np.mean(data[data[:,1]=='Female'][:,-1].astype(int)).round(2)
print("Male average spending score=",avg_male_spending_score)
print("Female average spending score=",avg_female_spending_score)
print("Male spends more than female" if avg_male_spending_score > avg_female_spending_score else "Female spends more than male")



mean[age, income, score] =  [38.85 60.56 50.2 ]
min[age, income, score] =  [18 15  1]
max[age, income, score] =  [ 70 137  99]
Male average income= 62.23
Female average income= 59.25
Male earns MORE than female
Male average spending score= 48.51
Female average spending score= 51.53
Female spends more than male


### Identify high value customers

In [24]:
avg_income_score80= np.mean(data[data[:,-1].astype(int) > 80][:,3].astype(int)).round(2)
print("Avg income of customers whose Spending score > 80 = ",avg_income_score80)


top10 = data[data[:,4].argsort()][::-1][:10]
top10_male= sum(top10[:,1]=='Male')
top10_female = sum(top10[:,1]=='Female')
print("Number of male in top 10 is ",top10_male ," and female is ",top10_female)
print("Males are more in top10" if top10_male > top10_female 
      else "Males and females are equal in top10" if top10_female == top10_male else "Females are more in top10" )
print("Min age for top10 is ", np.min(top10[:,2].astype(int)),
      "Max age for top10 is ", np.max(top10[:,2].astype(int)),
      "Avg age for top10 is ", np.mean(top10[:,2].astype(int)))


Avg income of customers whose Spending score > 80 =  69.07
Number of male in top 10 is  5  and female is  5
Males and females are equal in top10
Min age for top10 is  18 Max age for top10 is  40 Avg age for top10 is  30.5


### Explore Relationships between features

In [25]:
corr_age_income = np.corrcoef(data[:,2].astype(int),data[:,3].astype(int))
print("Correlation between age and income is \n",corr_age_income)

corr_age_spendingscore = np.corrcoef(data[:,2].astype(int),data[:,4].astype(int))
print("Correlation between age and spending score is \n",corr_age_spendingscore)


corr_income_spendingscore = np.corrcoef(data[:,3].astype(int),data[:,4].astype(int))
print("Correlation between income and spending score is \n",corr_income_spendingscore)



Correlation between age and income is 
 [[ 1.         -0.01239804]
 [-0.01239804  1.        ]]
Correlation between age and spending score is 
 [[ 1.         -0.32722685]
 [-0.32722685  1.        ]]
Correlation between income and spending score is 
 [[1.         0.00990285]
 [0.00990285 1.        ]]


### Customer Segmentation

In [26]:
new_column = np.full((data.shape[0],1), '',dtype='<U6')
new_column=np.where(data[:,4].astype(int) <=33 ,'Low', 
                    np.where(data[:,4].astype(int) <=66 , 'Medium', 'high'))
data = np.column_stack((data,new_column))

unique_value = np.unique(data[:,5])
for i in unique_value:
    group = data[data[:,5] == i]
    group_mean_age, group_mean_income = np.mean(group[:,2].astype(int)).round(2), np.mean(group[:,3].astype(int)).round(2)
    print("Avg age  & income for ",i, " spending score are ", group_mean_age," & ", group_mean_income, " respectively")



Avg age  & income for  Low  spending score are  42.88  &  67.0  respectively
Avg age  & income for  Medium  spending score are  42.01  &  53.86  respectively
Avg age  & income for  high  spending score are  30.18  &  66.07  respectively
