# Data Analysis

In [1]:
import pandas as pd

In [3]:
data = {'Name': ['John', 'Anna', 'Peter', 'Linda', 'John'],
        'Age': [35, 28, None, 32, 35],
        'Salary': [2500, 3500, 4000, 4000, 2500]}

In [4]:
df = pd.DataFrame(data)

In [5]:
df

Unnamed: 0,Name,Age,Salary
0,John,35.0,2500
1,Anna,28.0,3500
2,Peter,,4000
3,Linda,32.0,4000
4,John,35.0,2500


In [6]:
df.shape

(5, 3)

In [8]:
name_list = df["Name"]

In [9]:
name_list

0     John
1     Anna
2    Peter
3    Linda
4     John
Name: Name, dtype: object

#### Data Cleaning

In [10]:
df["Age"]

0    35.0
1    28.0
2     NaN
3    32.0
4    35.0
Name: Age, dtype: float64

In [11]:
df['Age'].mean()

32.5

In [13]:
df['Age'].fillna(df['Age'].mean(), inplace=True)

In [14]:
df

Unnamed: 0,Name,Age,Salary
0,John,35.0,2500
1,Anna,28.0,3500
2,Peter,32.5,4000
3,Linda,32.0,4000
4,John,35.0,2500


In [15]:
df.drop_duplicates(keep='first', inplace=True)

In [16]:
df

Unnamed: 0,Name,Age,Salary
0,John,35.0,2500
1,Anna,28.0,3500
2,Peter,32.5,4000
3,Linda,32.0,4000


In [17]:
df['Name']

0     John
1     Anna
2    Peter
3    Linda
Name: Name, dtype: object

In [18]:
df.loc[df['Name'] == 'Linda', 'Salary'] = 4500

In [19]:
df

Unnamed: 0,Name,Age,Salary
0,John,35.0,2500
1,Anna,28.0,3500
2,Peter,32.5,4000
3,Linda,32.0,4500


#### Data Manipulation

In [20]:
df['Bonus'] = df['Salary'] * 0.1

In [21]:
df

Unnamed: 0,Name,Age,Salary,Bonus
0,John,35.0,2500,250.0
1,Anna,28.0,3500,350.0
2,Peter,32.5,4000,400.0
3,Linda,32.0,4500,450.0


In [22]:
df_filtered = df[df['Age'] > 30]

In [23]:
df_filtered

Unnamed: 0,Name,Age,Salary,Bonus
0,John,35.0,2500,250.0
2,Peter,32.5,4000,400.0
3,Linda,32.0,4500,450.0


In [24]:
df_pivot = df.pivot(index='Name', columns='Age', values='Salary')

In [25]:
df_pivot

Age,28.0,32.0,32.5,35.0
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Anna,3500.0,,,
John,,,,2500.0
Linda,,4500.0,,
Peter,,,4000.0,


In [24]:
df.shape

(3900, 18)

### Importing Dataset

In [26]:
df = pd.read_csv('data.csv')

In [29]:
df.head(10)

Unnamed: 0,Customer ID,Age,Gender,Item Purchased,Category,Purchase Amount (USD),Location,Size,Color,Season,Review Rating,Subscription Status,Shipping Type,Discount Applied,Promo Code Used,Previous Purchases,Payment Method,Frequency of Purchases
0,1,55,Male,Blouse,Clothing,53,Kentucky,L,Gray,Winter,3.1,Yes,Express,Yes,Yes,14,Venmo,Fortnightly
1,2,19,Male,Sweater,Clothing,64,Maine,L,Maroon,Winter,3.1,Yes,Express,Yes,Yes,2,Cash,Fortnightly
2,3,50,Male,Jeans,Clothing,73,Massachusetts,S,Maroon,Spring,3.1,Yes,Free Shipping,Yes,Yes,23,Credit Card,Weekly
3,4,21,Male,Sandals,Footwear,90,Rhode Island,M,Maroon,Spring,3.5,Yes,Next Day Air,Yes,Yes,49,PayPal,Weekly
4,5,45,Male,Blouse,Clothing,49,Oregon,M,Turquoise,Spring,2.7,Yes,Free Shipping,Yes,Yes,31,PayPal,Annually
5,6,46,Male,Sneakers,Footwear,20,Wyoming,M,White,Summer,2.9,Yes,Standard,Yes,Yes,14,Venmo,Weekly
6,7,63,Male,Shirt,Clothing,85,Montana,M,Gray,Fall,3.2,Yes,Free Shipping,Yes,Yes,49,Cash,Quarterly
7,8,27,Male,Shorts,Clothing,34,Louisiana,L,Charcoal,Winter,3.2,Yes,Free Shipping,Yes,Yes,19,Credit Card,Weekly
8,9,26,Male,Coat,Outerwear,97,West Virginia,L,Silver,Summer,2.6,Yes,Express,Yes,Yes,8,Venmo,Annually
9,10,57,Male,Handbag,Accessories,31,Missouri,M,Pink,Spring,4.8,Yes,2-Day Shipping,Yes,Yes,4,Cash,Quarterly


#### Dataset Shape

In [30]:
df.shape

(3900, 18)

#### Select a Column

In [31]:
rating = df[df["Review Rating"] >= 4.0]

In [32]:
rating

Unnamed: 0,Customer ID,Age,Gender,Item Purchased,Category,Purchase Amount (USD),Location,Size,Color,Season,Review Rating,Subscription Status,Shipping Type,Discount Applied,Promo Code Used,Previous Purchases,Payment Method,Frequency of Purchases
9,10,57,Male,Handbag,Accessories,31,Missouri,M,Pink,Spring,4.8,Yes,2-Day Shipping,Yes,Yes,4,Cash,Quarterly
10,11,53,Male,Shoes,Footwear,34,Arkansas,L,Purple,Fall,4.1,Yes,Store Pickup,Yes,Yes,26,Bank Transfer,Bi-Weekly
11,12,30,Male,Shorts,Clothing,68,Hawaii,S,Olive,Winter,4.9,Yes,Store Pickup,Yes,Yes,10,Bank Transfer,Fortnightly
12,13,61,Male,Coat,Outerwear,72,Delaware,M,Gold,Winter,4.5,Yes,Express,Yes,Yes,37,Venmo,Fortnightly
13,14,65,Male,Dress,Clothing,51,New Hampshire,M,Violet,Spring,4.7,Yes,Express,Yes,Yes,31,PayPal,Weekly
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3883,3884,34,Female,Hat,Accessories,38,North Dakota,XL,Purple,Fall,4.0,No,Next Day Air,No,No,22,Cash,Fortnightly
3884,3885,47,Female,Sandals,Footwear,29,Maryland,M,Brown,Fall,4.9,No,Express,No,No,3,PayPal,Weekly
3891,3892,36,Female,Dress,Clothing,30,Colorado,L,Peach,Winter,4.7,No,Free Shipping,No,No,6,Bank Transfer,Quarterly
3895,3896,40,Female,Hoodie,Clothing,28,Virginia,L,Turquoise,Summer,4.2,No,2-Day Shipping,No,No,32,Venmo,Weekly
