## Imports

In [1]:
import pandas as pd

## read the fines.csv that you saved in the previous exercise

In [2]:
df = pd.read_csv('../data/fines.csv')
df.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
0,Y163O8161RUS,2,3200.0,Ford,Focus,1989
1,E432XX77RUS,1,6500.0,Toyota,Camry,1995
2,7184TT36RUS,1,2100.0,Ford,Focus,1984
3,X582HE161RUS,2,2000.0,Ford,Focus,2015
4,92918M178RUS,1,5700.0,Ford,Focus,2014


## iterations: in all the following subtasks, you need to calculate fines/refund*year for each row and create a new column with the calculated data and measure the time using the magic command %%timeit in the cell

In [3]:
%%time
new_col = []
for i in range(0, len(df)):
    new_col.append(df.iloc[i]['Fines'] / (df.iloc[i]
                   ['Refund'] * df.iloc[i]['Year']))
df['Fines / Refund * Year'] = new_col

CPU times: user 372 ms, sys: 6.73 ms, total: 379 ms
Wall time: 387 ms


In [4]:
df = df.drop(['Fines / Refund * Year'], axis=1)

In [5]:
%%time
new_col = []
for index, row in df.iterrows():
    new_col.append(row['Fines'] / (row['Refund'] * row['Year']))
df['Fines / Refund * Year'] = new_col

CPU times: user 111 ms, sys: 4.74 ms, total: 115 ms
Wall time: 121 ms


In [6]:
df = df.drop(['Fines / Refund * Year'], axis=1)

In [7]:
%%time
df['Fines / Refund * Year'] = df.apply(
    lambda row: row['Fines'] / (row['Refund'] * row['Year']), axis=1)

CPU times: user 24.9 ms, sys: 3.82 ms, total: 28.8 ms
Wall time: 32.2 ms


In [8]:
df = df.drop(['Fines / Refund * Year'], axis=1)

In [9]:
%%time
df['Fines / Refund * Year'] = df['Fines'] / (df['Refund'] * df['Year'])

CPU times: user 9.93 ms, sys: 5.76 ms, total: 15.7 ms
Wall time: 23.2 ms


In [10]:
df = df.drop(['Fines / Refund * Year'], axis=1)

In [11]:
%%time
df['Fines / Refund * Year'] = df['Fines'].values / \
    (df['Refund'].values * df['Year'].values)

CPU times: user 2.11 ms, sys: 1.01 ms, total: 3.12 ms
Wall time: 2.77 ms


In [12]:
df = df.drop(['Fines / Refund * Year'], axis=1)

## indexing: measure the time using the magic command %%timeit in the cell

In [13]:
%%time
df.loc[df['CarNumber'] == 'O136HO197RUS']

CPU times: user 1.67 ms, sys: 169 µs, total: 1.84 ms
Wall time: 1.84 ms


Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
715,O136HO197RUS,2,7800.0,Toyota,Corolla,1999
902,O136HO197RUS,2,7800.0,Toyota,Corolla,1998


In [14]:
df = df.set_index('CarNumber')
df.head()

Unnamed: 0_level_0,Refund,Fines,Make,Model,Year
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Y163O8161RUS,2,3200.0,Ford,Focus,1989
E432XX77RUS,1,6500.0,Toyota,Camry,1995
7184TT36RUS,1,2100.0,Ford,Focus,1984
X582HE161RUS,2,2000.0,Ford,Focus,2015
92918M178RUS,1,5700.0,Ford,Focus,2014


In [15]:
%%time
df.loc['O136HO197RUS']

CPU times: user 1.18 ms, sys: 62 µs, total: 1.24 ms
Wall time: 2.38 ms


Unnamed: 0_level_0,Refund,Fines,Make,Model,Year
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
O136HO197RUS,2,7800.0,Toyota,Corolla,1999
O136HO197RUS,2,7800.0,Toyota,Corolla,1998


## downcasting:

In [16]:
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Index: 930 entries, Y163O8161RUS to Y977PX77RUS
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Refund  930 non-null    int64  
 1   Fines   930 non-null    float64
 2   Make    930 non-null    object 
 3   Model   919 non-null    object 
 4   Year    930 non-null    int64  
dtypes: float64(1), int64(2), object(2)
memory usage: 212.6 KB


In [17]:
optimized = df.copy()

In [18]:
optimized['Fines'] = pd.to_numeric(optimized['Fines'], downcast='float')
optimized['Refund'] = pd.to_numeric(optimized['Refund'], downcast='integer')
optimized['Year'] = pd.to_numeric(optimized['Year'], downcast='integer')

In [19]:
optimized.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Index: 930 entries, Y163O8161RUS to Y977PX77RUS
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Refund  930 non-null    int8   
 1   Fines   930 non-null    float32
 2   Make    930 non-null    object 
 3   Model   919 non-null    object 
 4   Year    930 non-null    int16  
dtypes: float32(1), int16(1), int8(1), object(2)
memory usage: 197.1 KB


## categories:

In [20]:
optimized = optimized.reset_index()
optimized['CarNumber'] = optimized['Make'].astype('category')
optimized['Make'] = optimized['Make'].astype('category')
optimized['Model'] = optimized['Model'].astype('category')

In [21]:
optimized.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   CarNumber  930 non-null    category
 1   Refund     930 non-null    int8    
 2   Fines      930 non-null    float32 
 3   Make       930 non-null    category
 4   Model      919 non-null    category
 5   Year       930 non-null    int16   
dtypes: category(3), float32(1), int16(1), int8(1)
memory usage: 11.5 KB


## memory clean

In [22]:
%reset_selective -f df

In [23]:
df

NameError: name 'df' is not defined