# Section 6.1

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
df1 = pd.read_csv("Demo1.csv")

In [3]:
type(df1)

pandas.core.frame.DataFrame

In [4]:
df2 = pd.read_excel("Demo1.xlsx")

In [5]:
df2

Unnamed: 0,Table 1,Unnamed: 1
0,,
1,ID,Name
2,1,John
3,2,Sarah
4,3,Asheley
5,4,Bill
6,5,George


In [6]:
#great seaborn data sets to easily load
#https://github.com/mwaskom/seaborn-data

In [7]:
#reading in sample data - great for experimentation - for seaborn
#list of all read ins for sns datasets
df_anscombe=sns.load_dataset("anscombe")
df_attention=sns.load_dataset("attention")
df_brain_networks=sns.load_dataset("brain_networks")
df_car_crashes=sns.load_dataset("car_crashes")
df_diamonds=sns.load_dataset("diamonds")
df_dots=sns.load_dataset("dots")
df_exercise=sns.load_dataset("exercise")
df_flights=sns.load_dataset("flights")
df_fmri=sns.load_dataset("fmri")
df_gammas=sns.load_dataset("gammas")
df_geyser=sns.load_dataset("geyser")
df_iris=sns.load_dataset("iris")
df_mpg=sns.load_dataset("mpg")
df_penguins=sns.load_dataset("penguins")
df_planets=sns.load_dataset("planets")
df_tips=sns.load_dataset("tips")
df_titanic=sns.load_dataset("titanic")

In [8]:
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


# Section 6.2
## Creating & Transformating DataFrames

In [9]:
vehicles = {"Type": ["Car", "Suv", "Truck", "Scooter", "Bike"], "Cost": ["500","600", "800", "250", "75"]}

In [10]:
type(vehicles)

dict

In [11]:
df = pd.DataFrame(vehicles, columns = ["Type", "Cost"])

In [12]:
df

Unnamed: 0,Type,Cost
0,Car,500
1,Suv,600
2,Truck,800
3,Scooter,250
4,Bike,75


In [13]:
#flip the table
df_flip = df.T

In [14]:
df_flip

Unnamed: 0,0,1,2,3,4
Type,Car,Suv,Truck,Scooter,Bike
Cost,500,600,800,250,75


In [15]:
df_flip = df_flip.rename(columns = {
    df_flip.columns[0]:"Option 1",
    df_flip.columns[1]:"Option 2",
    df_flip.columns[2]:"Option 3",
    df_flip.columns[3]:"Option 4",
    df_flip.columns[4]:"Option 5"
})

In [16]:
df_flip

Unnamed: 0,Option 1,Option 2,Option 3,Option 4,Option 5
Type,Car,Suv,Truck,Scooter,Bike
Cost,500,600,800,250,75


In [17]:
df_next = df_flip.T

In [18]:
df_next

Unnamed: 0,Type,Cost
Option 1,Car,500
Option 2,Suv,600
Option 3,Truck,800
Option 4,Scooter,250
Option 5,Bike,75


In [19]:
df_next = df_next.reset_index()

In [20]:
df_next

Unnamed: 0,index,Type,Cost
0,Option 1,Car,500
1,Option 2,Suv,600
2,Option 3,Truck,800
3,Option 4,Scooter,250
4,Option 5,Bike,75


In [21]:
df_next = df_next.rename(columns={df_next.columns[0]: "Option"})

In [22]:
df_next.dtypes

Option    object
Type      object
Cost      object
dtype: object

In [23]:
df_next["Cost"] = df_next["Cost"].astype(float)
df_next

Unnamed: 0,Option,Type,Cost
0,Option 1,Car,500.0
1,Option 2,Suv,600.0
2,Option 3,Truck,800.0
3,Option 4,Scooter,250.0
4,Option 5,Bike,75.0


In [24]:
#25% discount on all options!
#this means we charge 75% of the original cost!
df_next["Sale Price"] = df_next["Cost"] * 0.75

In [25]:
df_next

Unnamed: 0,Option,Type,Cost,Sale Price
0,Option 1,Car,500.0,375.0
1,Option 2,Suv,600.0,450.0
2,Option 3,Truck,800.0,600.0
3,Option 4,Scooter,250.0,187.5
4,Option 5,Bike,75.0,56.25


In [26]:
df_next["Diff"] = df_next["Cost"] - df_next["Sale Price"]

In [27]:
df_next

Unnamed: 0,Option,Type,Cost,Sale Price,Diff
0,Option 1,Car,500.0,375.0,125.0
1,Option 2,Suv,600.0,450.0,150.0
2,Option 3,Truck,800.0,600.0,200.0
3,Option 4,Scooter,250.0,187.5,62.5
4,Option 5,Bike,75.0,56.25,18.75


# Section 6.3
## Groupby & Crosstab

In [28]:
import pandas as pd
import numpy as np
import seaborn as sns

In [29]:
df_tips = sns.load_dataset("tips")

In [30]:
df_tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [31]:
tip_by_sex = df_tips.groupby(["sex", "smoker"])["tip"].mean()

In [32]:
type(tip_by_sex)

pandas.core.series.Series

In [33]:
tip_by_sex

sex     smoker
Male    Yes       3.051167
        No        3.113402
Female  Yes       2.931515
        No        2.773519
Name: tip, dtype: float64

In [34]:
tip_by_sex = tip_by_sex.reset_index()

In [35]:
tip_by_sex

Unnamed: 0,sex,smoker,tip
0,Male,Yes,3.051167
1,Male,No,3.113402
2,Female,Yes,2.931515
3,Female,No,2.773519


In [36]:
tip_by_sex = tip_by_sex.rename(columns= {tip_by_sex.columns[1]: "Average Tip"})

In [37]:
tip_by_sex

Unnamed: 0,sex,Average Tip,tip
0,Male,Yes,3.051167
1,Male,No,3.113402
2,Female,Yes,2.931515
3,Female,No,2.773519


In [38]:
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [39]:
this = pd.crosstab(columns= df_tips["sex"], index = df_tips["smoker"])

In [40]:
this

sex,Male,Female
smoker,Unnamed: 1_level_1,Unnamed: 2_level_1
Yes,60,33
No,97,54


In [41]:
python_list = [1,2,3]

In [42]:
numpy_array = np.array([1,2,3])

In [43]:
print(python_list+python_list)

[1, 2, 3, 1, 2, 3]


In [44]:
print(numpy_array+numpy_array)

[2 4 6]


In [45]:
np.array([True, 1, 2]) + np.array([3, 4, False])

array([4, 5, 2])

In [46]:

np.array([True, 1, 2, 3, 4, False])

array([1, 1, 2, 3, 4, 0])

In [47]:

np.array([4, 3, 0]) + np.array([0, 2, 2])

array([4, 5, 2])

In [48]:
x = ["a", "b", "c"]
x[1]

np_x = np.array(x)
np_x[1]

'b'