## Basics & Math

In [24]:
import numpy as np
import pandas as pd

In [25]:
# Retrieve first five DataFrame rows from ComputerSales.csv
cs_df = pd.read_csv('data/ComputerSales.csv')
cs_df.head()

Unnamed: 0,Sale ID,Contact,Sex,Age,State,Product ID,Product Type,Sale Price,Profit,Lead,Month,Year
0,1,Paul Thomas,M,43,OH,M01-F0024,Desktop,479.99,143.39,Website,January,2018
1,2,Margo Simms,F,37,WV,GT13-0024,Desktop,1249.99,230.89,Flyer 4,January,2018
2,3,Sam Stine,M,26,PA,I3670,Desktop,649.99,118.64,Website,February,2018
3,4,Moe Eggert,M,35,PA,I3593,Laptop,399.99,72.09,Website,March,2018
4,5,Jessica Elk,F,55,PA,15M-ED,Laptop,699.99,98.09,Flyer 4,March,2018


In [26]:
# Retrieve last five DataFrame rows from ComputerSales.csv
cs_df.tail()

Unnamed: 0,Sale ID,Contact,Sex,Age,State,Product ID,Product Type,Sale Price,Profit,Lead,Month,Year
34,35,Michelle Samms,F,46,NY,MY2J2LL,Tablet,999.99,146.69,Website,April,2020
35,36,Sally Struthers,F,45,NY,81TC00,Laptop,649.99,122.34,Website,April,2020
36,37,Jason Case,M,57,PA,M01-F0024,Desktop,479.99,143.39,Flyer 4,April,2020
37,38,Doug Johnson,M,51,PA,GA401IV,Laptop,1349.99,180.34,Website,May,2020
38,39,Moe Eggert,M,35,PA,I3593,Laptop,399.99,72.09,Website,May,2020


In [27]:
# Retrieve first n DataFrame rows from ComputerSales.csv (ex: n = 2)
cs_df[:2]

Unnamed: 0,Sale ID,Contact,Sex,Age,State,Product ID,Product Type,Sale Price,Profit,Lead,Month,Year
0,1,Paul Thomas,M,43,OH,M01-F0024,Desktop,479.99,143.39,Website,January,2018
1,2,Margo Simms,F,37,WV,GT13-0024,Desktop,1249.99,230.89,Flyer 4,January,2018


In [28]:
# Retrieve first n DataFrame rows with step m from ComputerSales.csv (ex: n = 5, m = 2)
cs_df[:5:2]

Unnamed: 0,Sale ID,Contact,Sex,Age,State,Product ID,Product Type,Sale Price,Profit,Lead,Month,Year
0,1,Paul Thomas,M,43,OH,M01-F0024,Desktop,479.99,143.39,Website,January,2018
2,3,Sam Stine,M,26,PA,I3670,Desktop,649.99,118.64,Website,February,2018
4,5,Jessica Elk,F,55,PA,15M-ED,Laptop,699.99,98.09,Flyer 4,March,2018


In [29]:
# Convert cs_df DataFrame to NumPy array
cs_df.to_numpy()

array([[1, 'Paul Thomas', 'M', 43, 'OH', 'M01-F0024', 'Desktop', 479.99,
        143.39, 'Website', 'January', 2018],
       [2, 'Margo Simms', 'F', 37, 'WV', 'GT13-0024', 'Desktop', 1249.99,
        230.89, 'Flyer 4', 'January', 2018],
       [3, 'Sam Stine', 'M', 26, 'PA', 'I3670', 'Desktop', 649.99,
        118.64, 'Website', 'February', 2018],
       [4, 'Moe Eggert', 'M', 35, 'PA', 'I3593', 'Laptop', 399.99, 72.09,
        'Website', 'March', 2018],
       [5, 'Jessica Elk', 'F', 55, 'PA', '15M-ED', 'Laptop', 699.99,
        98.09, 'Flyer 4', 'March', 2018],
       [6, 'Sally Struthers', 'F', 45, 'PA', 'GT13-0024', 'Desktop',
        1249.99, 230.89, 'Flyer 2', 'April', 2018],
       [7, 'Michelle Samms', 'F', 46, 'OH', 'GA401IV', 'Laptop', 1349.99,
        180.34, 'Email', 'May', 2018],
       [8, 'Mick Roberts', 'M', 23, 'OH', 'MY2J2LL', 'Tablet', 999.99,
        146.69, 'Website', 'July', 2018],
       [9, 'Ed Klondike', 'M', 52, 'OH', '81TC00', 'Laptop', 649.99,
        122.34

In [30]:
# Create such DataFrame from dictionary:

# one	two
# a	1.0	4.0
# b	2.0	NaN
# c	3.0	6.0
# d	NaN	7.0

dict_3 = {'one': pd.Series([1.,2.,3.], index=['a','b','c']),
         'two': pd.Series([4.,np.nan,6.,7.], index=['a','b','c','d'])}

df_2 = pd.DataFrame(dict_3)
df_2

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,
c,3.0,6.0
d,,7.0


In [33]:
# Replace nan values with 0 (in DF df_2)
df_2.fillna(0, inplace=True)
df_2

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,0.0
c,3.0,6.0
d,0.0,7.0


In [35]:
# Add second row to all rows of DataFrame df_2
row = df_2.iloc[1]
df_2.add(row, axis='columns')

Unnamed: 0,one,two
a,3.0,4.0
b,4.0,0.0
c,5.0,6.0
d,2.0,7.0


In [37]:
# Subtract first columns from all other columns of DataFrame df_2
col = df_2['one']
df_2.sub(col, axis=0)

Unnamed: 0,one,two
a,0.0,3.0
b,0.0,-2.0
c,0.0,3.0
d,0.0,7.0


In [39]:
# Check if DataFrame is empty
df_2.empty

False

In [46]:
# Create DataFrame df_5 with columns named 'A' and 'B' each having values ranging from 0 to 3 and 1 to 4 repectrfully
df_5 = pd.DataFrame({'A': range(3), 'B': range(1,4)})
df_5

Unnamed: 0,A,B
0,0,1
1,1,2
2,2,3


In [49]:
# Executre function on DataFrame df_5 that would take squre root of every value in it
df_5.transform(lambda x: np.sqrt(x))

Unnamed: 0,A,B
0,0.0,1.0
1,1.0,1.414214
2,1.414214,1.732051


In [52]:
# Executre function on DataFrame df_5 that would get every value in it to a power of two AND three
df_5.transform([lambda x: x**2, lambda x: x**3])

Unnamed: 0_level_0,A,B
Unnamed: 0_level_1,<lambda>,<lambda>
0,0,1
1,1,8
2,8,27
