### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [2]:
#remove duplicate names in SN
totalplayers = purchase_data['SN'].drop_duplicates()
#count updated total players
totalplayers.columns =['Total']
#present in dataframe
PTP = pd.DataFrame(totalplayers.count(), columns = ["Total Players"], index = [""])
PTP

Unnamed: 0,Total Players
,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [6]:
#find all required data for unique items, avg price, #purchases, and total revenue
uitems = purchase_data['Item ID'].drop_duplicates().count() 
avgprice = purchase_data['Price'].mean()
numpur = purchase_data['Item ID'].count()
totalrev = purchase_data['Price'].sum()
#add all required data to list
purlist = uitems, avgprice, numpur, totalrev
#create data frame
pa = pd.DataFrame([purlist], columns = ['Number of Unique Items', 'Average Price', 'Number of Purchases', 
                                        'Total Revenue'],index = [""])
#formatting proper decimal spacing and currency
pd.options.display.float_format = '${:.2f}'.format

#display data frame 
pa

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
,179,$3.05,780,$2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [7]:
#countof male players from purchase_data
maledf = purchase_data[purchase_data.Gender == "Male"]
male = maledf['SN'].drop_duplicates().count()

#count of female players
femaledf = purchase_data[purchase_data.Gender == "Female"]
female = femaledf['SN'].drop_duplicates().count()

#count of Other players
otherdf = purchase_data['SN'].drop_duplicates().count()
other = otherdf - (male + female)

#getting Total count of players
total = male + female + other

#getting Averages for each section
maleavg = (male / total)*100
femaleavg = (female / total)*100
otheravg = (other / total)*100

#gender lists
gendercnt = male , female , other
genderavg = maleavg , femaleavg , otheravg

#creating data frame
genderdf = pd.DataFrame({'Total Count':[male, female, other],
    'Percentage of Players':[maleavg, femaleavg, otheravg] },index = ['Male', 'Female', 'Other'])

#format percentages
pd.options.display.float_format = '%{:.2f}'.format
genderdf


Unnamed: 0,Total Count,Percentage of Players
Male,484,%84.03
Female,81,%14.06
Other,11,%1.91



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [3]:
#male purchase count, avg purchase price, avg purchase total per person
malepurdf = purchase_data.loc[purchase_data['Gender'] == 'Male',:]
malepurcount = malepurdf['Item ID'].count()
malepuravg = malepurdf['Price'].mean()
malepurtotal = malepurdf['Price'].sum()
maleavgpp = malepuravg

#Female purchase count, avg purchase price, avg purchase total per person
femalepurdf = purchase_data.loc[purchase_data['Gender'] == 'Female',:]
femalepurcount = femalepurdf['Item ID'].count()
femalepuravg = femalepurdf['Price'].mean()
femalepurtotal = femalepurdf['Price'].sum()
femaleavgpp = femalepurtotal / femalepurcount

#Other purchase count, avg purchase price, avg purchase total per person 
totalpurcount = purchase_data['Item ID'].count()
otherpurcount = totalpurcount - (malepurcount + femalepurcount)
otherpurdf = purchase_data.loc[(purchase_data['Gender'] != 'Male') & (purchase_data['Gender'] !='Female')]
otherpuravg = otherpurdf['Price'].mean()
otherpurtotal = otherpurdf['Price'].sum()
otheravgpp = otherpurtotal / otherpurcount

 
#I need to clarify what average total purchase per person means!!!!!    

purdf = pd.DataFrame({
    'Purchase Count':[malepurcount, femalepurcount, otherpurcount], 
    'Average Purchase Price':[malepuravg, femalepuravg, otherpuravg],
    'Total Purchase Value': [malepurtotal, femalepurtotal, otherpurtotal], 
    'Average Total Purchase Per Person': [maleavgpp, femaleavgpp, otheravgpp] },
    index = ['Male', 'Female', 'Other'])
pd.options.display.float_format = '${:.2f}'.format
purdf   

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase Per Person
Male,652,$3.02,$1967.64,$3.02
Female,113,$3.20,$361.94,$3.20
Other,15,$3.35,$50.19,$3.35


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [4]:
#raw data for age count  

age10 = purchase_data.loc[(purchase_data['Age']<10)]
age10count = age10['SN'].drop_duplicates().count()
age1014 = purchase_data.loc[(purchase_data['Age']>9) & (purchase_data['Age']<15)]
age1014count = age1014['SN'].drop_duplicates().count() 
age1519 = purchase_data.loc[(purchase_data['Age']>14) & (purchase_data['Age']<20)]
age1519count = age1519['SN'].drop_duplicates().count()
age2024 = purchase_data.loc[(purchase_data['Age']>19) & (purchase_data['Age']<25)]
age2024count = age2024['SN'].drop_duplicates().count()
age2529 = purchase_data.loc[(purchase_data['Age']>24) & (purchase_data['Age']<30)]
age2529count = age2529['SN'].drop_duplicates().count()
age3034 = purchase_data.loc[(purchase_data['Age']>29) & (purchase_data['Age']<35)]
age3034count = age3034['SN'].drop_duplicates().count()
age3539 = purchase_data.loc[(purchase_data['Age']>34) & (purchase_data['Age']<40)]
age3539count = age3539['SN'].drop_duplicates().count()
age40 = purchase_data.loc[(purchase_data['Age']>39)]
age40count = age40['SN'].drop_duplicates().count()

age10 = age10['Age'].count()
age1014 = age1014['Age'].count()
age1519 = age1519['Age'].count()
age2024 = age2024['Age'].count()
age2529 = age2529['Age'].count()
age3034 = age3034['Age'].count()
age3539 = age3539['Age'].count()
age40 = age40['Age'].count()

#data for percentages
agetotal = purchase_data['SN'].drop_duplicates().count()
age10per = (age10count/agetotal)*100
age1014per = (age1014count/agetotal)*100
age1519per = (age1519count/agetotal)*100
age2024per = (age2024count/agetotal)*100
age2529per = (age2529count/agetotal)*100
age3034per = (age3034count/agetotal)*100
age3539per = (age3539count/agetotal)*100
age40per = (age40count/agetotal)*100

#creating bins 
bins = [0,9,14,19,24,29,34,39,100]
binnames = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']
pd.cut(purchase_data["Age"], bins, labels=binnames, include_lowest=True).head()

df2 = pd.DataFrame({
    'Total Count': [age10count, age1014count, age1519count,age2024count,
                    age2529count,age3034count,age3539count,age40count],
    'Percentage of Players':[age10per, age1014per, age1519per, age2024per,
                            age2529per, age3034per, age3539per, age40per]
},
index = [binnames])

#set Formating for percentages
pd.options.display.float_format = '{:.2f}%'.format

df2

Unnamed: 0,Total Count,Percentage of Players
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [6]:
age10a = purchase_data.loc[(purchase_data['Age']<10)]
age10sum = age10['SN'].drop_duplicates().sum()
age1014a = purchase_data.loc[(purchase_data['Age']>9) & (purchase_data['Age']<15)]
age1014sum = age1014['SN'].drop_duplicates().sum() 
age1519a = purchase_data.loc[(purchase_data['Age']>14) & (purchase_data['Age']<20)]
age1519sum = age1519['SN'].drop_duplicates().sum()
age2024a = purchase_data.loc[(purchase_data['Age']>19) & (purchase_data['Age']<25)]
age2024sum = age2024['SN'].drop_duplicates().sum()
age2529a = purchase_data.loc[(purchase_data['Age']>24) & (purchase_data['Age']<30)]
age2529sum = age2529['SN'].drop_duplicates().sum()
age3034a = purchase_data.loc[(purchase_data['Age']>29) & (purchase_data['Age']<35)]
age3034sum = age3034['SN'].drop_duplicates().sum()
age3539a = purchase_data.loc[(purchase_data['Age']>34) & (purchase_data['Age']<40)]
age3539sum = age3539['SN'].drop_duplicates().sum()
age40a = purchase_data.loc[(purchase_data['Age']>39)]
age40sum = age40['SN'].drop_duplicates().sum()


agebins = [0,9,14,19,24,29,34,39,100]
agebinnames = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']
test = pd.cut(purchase_data["Age"], agebins, labels=agebinnames, include_lowest=True)

agecount1 = purchase_data.loc[(purchase_data['Age']<10)]
agecountpur = agecount1['Price'].sum()


                               

agepurdf = pd.DataFrame = ({
    'Purchase Count': [age10sum,age1014sum,age1519sum,age2024sum,age2529sum,age3034sum,age3539sum,age40sum]
})







test

TypeError: isinstance() arg 2 must be a type or tuple of types

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80


Unnamed: 0,Class,Name,Test Score
0,Oct,Cyndy,90
1,Oct,Logan,59
2,Jan,Laci,72
3,Jan,Elmer,88
4,Oct,Crystle,98
5,Jan,Emmie,60


Unnamed: 0,Class,Name,Test Score,Test Score Summary
0,Oct,Cyndy,90,A
1,Oct,Logan,59,F
2,Jan,Laci,72,C
3,Jan,Elmer,88,B
4,Oct,Crystle,98,A
5,Jan,Emmie,60,D


4.99 1.0


0.002096841291385302