In [37]:
import pandas as pd
import numpy as np
import requests

## read the JSON file that you saved in ex02

In [38]:
pd.set_option("display.float_format", '{:.2f}'.format)
data = pd.DataFrame
try:
    data = pd.read_json("../data/auto.json", orient='records')
    print(data)
except IOError as e:
    print(e)
    quit(1)
data

        CarNumber  Refund    Fines    Make    Model
0    Y163O8161RUS       2  3200.00    Ford    Focus
1     E432XX77RUS       1  6500.00  Toyota    Camry
2     7184TT36RUS       1  2100.00    Ford    Focus
3    X582HE161RUS       2  2000.00    Ford    Focus
4    92918M178RUS       1  5700.00    Ford    Focus
..            ...     ...      ...     ...      ...
720  Y163O8161RUS       2  1600.00    Ford    Focus
721  M0309X197RUS       1 22300.00    Ford    Focus
722  O673E8197RUS       2   600.00    Ford    Focus
723  8610T8154RUS       1  2000.00    Ford    Focus
724  H419XE197RUS       2  8594.59  Toyota  Corolla

[725 rows x 5 columns]


Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.00,Ford,Focus
1,E432XX77RUS,1,6500.00,Toyota,Camry
2,7184TT36RUS,1,2100.00,Ford,Focus
3,X582HE161RUS,2,2000.00,Ford,Focus
4,92918M178RUS,1,5700.00,Ford,Focus
...,...,...,...,...,...
720,Y163O8161RUS,2,1600.00,Ford,Focus
721,M0309X197RUS,1,22300.00,Ford,Focus
722,O673E8197RUS,2,600.00,Ford,Focus
723,8610T8154RUS,1,2000.00,Ford,Focus


## enrich the dataframe using a sample from that dataframe

In [39]:
smpl = data.sample(n=200, random_state=21)
smpl['Fines'] = data['Fines'].sample(n=200, random_state=np.random.RandomState()).values
smpl['Refund'] = data['Refund'].sample(n=200, random_state=np.random.RandomState()).values
print(smpl)

        CarNumber  Refund    Fines        Make   Model
445  M0299X197RUS       2 12300.00        Ford   Focus
22   83298C154RUS       2  8594.59        Ford   Focus
93   H957HY161RUS       1   200.00        Ford   Focus
173   T941CC96RUS       2  4000.00        Ford   Focus
697  H966HY161RUS       2  2500.00        Ford   Focus
..            ...     ...      ...         ...     ...
14   8182XX154RUS       1 11400.00        Ford   Focus
623   X796TH96RUS       1  8600.00        Ford   Focus
498  T011MY163RUS       2  3500.00        Ford   Focus
536   T341CC96RUS       1  9500.00  Volkswagen  Passat
520   T119CT96RUS       2   600.00        Ford   Focus

[200 rows x 5 columns]


In [40]:
concat_rows = pd.concat([data, smpl])
concat_rows.reset_index(drop=True, inplace=True)
print(concat_rows)

        CarNumber  Refund    Fines        Make   Model
0    Y163O8161RUS       2  3200.00        Ford   Focus
1     E432XX77RUS       1  6500.00      Toyota   Camry
2     7184TT36RUS       1  2100.00        Ford   Focus
3    X582HE161RUS       2  2000.00        Ford   Focus
4    92918M178RUS       1  5700.00        Ford   Focus
..            ...     ...      ...         ...     ...
920  8182XX154RUS       1 11400.00        Ford   Focus
921   X796TH96RUS       1  8600.00        Ford   Focus
922  T011MY163RUS       2  3500.00        Ford   Focus
923   T341CC96RUS       1  9500.00  Volkswagen  Passat
924   T119CT96RUS       2   600.00        Ford   Focus

[925 rows x 5 columns]


## enrich the dataframe concat_rows by a new column with the data generated

In [41]:
np.random.seed(21)
s = pd.Series(np.random.randint(1980, 2020, (len(concat_rows))), dtype='int', name='Year')
fines = pd.concat([concat_rows,s],axis=1)
print(fines)

        CarNumber  Refund    Fines        Make   Model  Year
0    Y163O8161RUS       2  3200.00        Ford   Focus  1989
1     E432XX77RUS       1  6500.00      Toyota   Camry  1995
2     7184TT36RUS       1  2100.00        Ford   Focus  1984
3    X582HE161RUS       2  2000.00        Ford   Focus  2015
4    92918M178RUS       1  5700.00        Ford   Focus  2014
..            ...     ...      ...         ...     ...   ...
920  8182XX154RUS       1 11400.00        Ford   Focus  1981
921   X796TH96RUS       1  8600.00        Ford   Focus  1992
922  T011MY163RUS       2  3500.00        Ford   Focus  2007
923   T341CC96RUS       1  9500.00  Volkswagen  Passat  2005
924   T119CT96RUS       2   600.00        Ford   Focus  1997

[925 rows x 6 columns]


## enrich the dataframe with the data from another dataframe

In [42]:
url = "https://projects.intra.42.fr/uploads/document/document/8786/surname.json"
headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'}
r = requests.models.Response
try:
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    sn = pd.DataFrame.from_records(r.json())
    nh = sn.iloc[0]
    sn = sn[1:]
    sn.columns = nh
except requests.RequestException as e:
    print(f"Exception: {e}")
    quit(1)
except requests.JSONDecodeError as e:
    print(f"Exception: {e}")
    quit(2)
cars = pd.Series(fines.CarNumber.unique()).rename('CarNumber')
surnames = sn.NAME.sample(n=len(cars), random_state=21, replace=True, ignore_index=True).rename('SURNAME')
owners = pd.concat([cars, surnames], axis=1)
dict1 = {"CarNumber": ["Y163O8160RUS", "9184UT36RUS", "PY316E877RUS", "Y318P876RUS", "O05T8196RUS"],
        "Refund": [2, 1, 3, 1, 2],
        "Fines": [2300.00, 4500.00, 3600.00, 500.00, 1100.00],
        "Make": ["Renault", "Opel", "Hyundai", "Dodge", "Nissan"],
        "Model": ["Logan", "Astra", "Solaris", "Viper", "GT-X"],
        "Year": [2006, 2003, 2010, 2007, 2015]}
new_obs1 = pd.DataFrame(dict1)
fines = pd.concat([fines, new_obs1], axis=0, ignore_index=True)
owners.drop(labels=range(len(owners) - 20, len(owners)), axis=0, inplace=True)
dict2 = {"CarNumber": ["U761HY137RUS", "2367J8147RUS", "A0306X159RUS"],
         "SURNAME": ["BAKER", "LONG", "KING"]}
new_obs2 = pd.DataFrame(dict2)
owners = pd.concat([owners, new_obs2], axis=0, ignore_index=True)


514