# Production Dataset

In [1]:
import pandas as pd
import numpy as np
import uuid
import random
from faker import Faker
import string
from datetime import datetime, timedelta, date

In [2]:
df = pd.read_csv("vehicles.csv")

In [3]:
def fill_na(column):
    actual = column.dropna().tolist()
    return column.apply(lambda x: x if pd.notna(x) else random.choice(actual))

def fill_na_vin(column):
    actual = column.dropna().tolist()
    vin_chars = string.ascii_uppercase.replace('I', '').replace('O', '').replace('Q', '') + string.digits

    def generate_random_vin():
        return ''.join(random.choices(vin_chars, k=17))
    
    return column.apply(lambda x: x if pd.notna(x) else generate_random_vin())

def generate_random_dates(n, start_date, end_date):
    start_u = start_date.value // 10**9
    end_u = end_date.value // 10**9
    return pd.to_datetime(np.random.randint(start_u, end_u, n), unit='s')

In [4]:
df = df[27:]
df = df.reset_index().drop(["county", "size", "index"], axis=1)

In [5]:
fake = Faker()

df['drive'] = fill_na(df['drive'])
df['cylinders'] = fill_na(df['cylinders'])
df['paint_color'] = fill_na(df['paint_color'])
df['model'] = fill_na(df['model'])
df['manufacturer'] = fill_na(df['manufacturer'])
df['condition'] = fill_na(df['condition'])
df['fuel'] = fill_na(df['fuel'])
df['type'] = fill_na(df['type'])
df['VIN'] = fill_na_vin(df['VIN'])
df['description'] = [fake.text(max_nb_chars=200) for _ in range(len(df))]

avg_price = df["price"].mean()
df["price"] = (df["price"] / avg_price) * 200

df['cylinders'] = df['cylinders'].str.extract('(\d+)').astype(float)
df['cylinders'].fillna(df['cylinders'].dropna().sample(n=1).values[0], inplace=True)
df['cylinders'] = df['cylinders'].astype(int)


df['paint_color'] = df['paint_color'].replace('custom', 'white')
df['transmission'] = df['transmission'].replace('other', 'automatic')

df = df.rename(columns={
    "VIN": "vin"
})

end_date = pd.to_datetime('now')
start_date = end_date - pd.DateOffset(years=1)
df['posting_date'] = generate_random_dates(len(df), start_date, end_date)

df

  result, tz_parsed = tslib.array_to_datetime(


Unnamed: 0,id,url,region,region_url,price,year,manufacturer,model,condition,cylinders,...,vin,drive,type,paint_color,image_url,description,state,lat,long,posting_date
0,7316814884,https://auburn.craigslist.org/ctd/d/auburn-uni...,auburn,https://auburn.craigslist.org,89.331452,2014.0,gmc,sierra 1500 crew cab slt,good,8,...,3GTP1VEC4EG551563,fwd,pickup,white,https://images.craigslist.org/00R0R_lwWjXSEWNa...,Anyone among build scientist. Hair rise feelin...,al,32.590000,-85.480000,2023-11-26 10:12:55
1,7316814758,https://auburn.craigslist.org/ctd/d/auburn-uni...,auburn,https://auburn.craigslist.org,60.077330,2010.0,chevrolet,silverado 1500,good,8,...,1GCSCSE06AZ123805,rwd,pickup,blue,https://images.craigslist.org/00R0R_lwWjXSEWNa...,Choice friend nearly keep activity poor. Food ...,al,32.590000,-85.480000,2023-07-31 04:19:22
2,7316814989,https://auburn.craigslist.org/ctd/d/auburn-uni...,auburn,https://auburn.craigslist.org,105.288246,2020.0,chevrolet,silverado 1500 crew,good,8,...,3GCPWCED5LG130317,fwd,pickup,red,https://images.craigslist.org/01212_jjirIWa0y0...,Carry research town establish. Begin total gov...,al,32.590000,-85.480000,2023-11-13 15:09:34
3,7316743432,https://auburn.craigslist.org/ctd/d/auburn-uni...,auburn,https://auburn.craigslist.org,82.416841,2017.0,toyota,tundra double cab sr,good,8,...,5TFRM5F17HX120972,fwd,pickup,red,https://images.craigslist.org/00x0x_1y9kIOzGCF...,Campaign itself style rich still exactly maybe...,al,32.590000,-85.480000,2024-03-10 03:05:55
4,7316356412,https://auburn.craigslist.org/cto/d/auburn-uni...,auburn,https://auburn.craigslist.org,39.891985,2013.0,ford,f-150 xlt,excellent,6,...,2WNSSJ7DTDE5HFGVG,rwd,truck,black,https://images.craigslist.org/00404_l4loxHvdQe...,Floor reality because through. Table public ec...,al,32.592000,-85.518900,2023-10-25 10:25:54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
426848,7301591192,https://wyoming.craigslist.org/ctd/d/atlanta-2...,wyoming,https://wyoming.craigslist.org,62.736795,2019.0,nissan,maxima s sedan 4d,good,6,...,1N4AA6AV6KC367801,fwd,sedan,red,https://images.craigslist.org/00o0o_iiraFnHg8q...,Amount half bank race investment move final. D...,wy,33.786500,-84.445400,2024-03-19 16:17:10
426849,7301591187,https://wyoming.craigslist.org/ctd/d/atlanta-2...,wyoming,https://wyoming.craigslist.org,81.353055,2020.0,volvo,s60 t5 momentum sedan 4d,good,8,...,7JR102FKXLG042696,fwd,sedan,red,https://images.craigslist.org/00x0x_15sbgnxCIS...,Forward think up natural table beautiful milit...,wy,33.786500,-84.445400,2024-04-22 18:18:29
426850,7301591147,https://wyoming.craigslist.org/ctd/d/atlanta-2...,wyoming,https://wyoming.craigslist.org,93.054704,2020.0,cadillac,xt4 sport suv 4d,good,4,...,1GYFZFR46LF088296,fwd,hatchback,white,https://images.craigslist.org/00L0L_farM7bxnxR...,Those compare ground claim floor whatever. Sud...,wy,33.779214,-84.411811,2023-06-24 23:25:40
426851,7301591140,https://wyoming.craigslist.org/ctd/d/atlanta-2...,wyoming,https://wyoming.craigslist.org,77.097910,2018.0,lexus,es 350 sedan 4d,good,6,...,58ABK1GG4JU103853,fwd,sedan,silver,https://images.craigslist.org/00z0z_bKnIVGLkDT...,Give catch lawyer material. Process fight afte...,wy,33.786500,-84.445400,2023-09-08 09:42:30


In [7]:
df.columns

Index(['id', 'url', 'region', 'region_url', 'price', 'year', 'manufacturer',
       'model', 'condition', 'cylinders', 'fuel', 'odometer', 'title_status',
       'transmission', 'vin', 'drive', 'type', 'paint_color', 'image_url',
       'description', 'state', 'lat', 'long', 'posting_date'],
      dtype='object')

In [23]:
unique_cars = df[['manufacturer', 'model']].drop_duplicates().reset_index(drop=True)
unique_cars['car_id'] = range(1, len(unique_cars) + 1)

cars_table = df[["id", "manufacturer", "model", "drive", "cylinders", "transmission", "paint_color"]]

cars_table = cars_table.merge(unique_cars, on=['manufacturer', 'model'], how='left').drop_duplicates().reset_index(drop=True)

cars_table

Unnamed: 0,id,manufacturer,model,drive,cylinders,transmission,paint_color,car_id
0,7316814884,gmc,sierra 1500 crew cab slt,4wd,8,automatic,white,1
1,7316814758,chevrolet,silverado 1500,fwd,8,automatic,blue,2
2,7316814989,chevrolet,silverado 1500 crew,rwd,8,automatic,red,3
3,7316743432,toyota,tundra double cab sr,fwd,8,automatic,red,4
4,7316356412,ford,f-150 xlt,rwd,6,automatic,black,5
...,...,...,...,...,...,...,...,...
426848,7301591192,nissan,maxima s sedan 4d,fwd,6,automatic,blue,1099
426849,7301591187,volvo,s60 t5 momentum sedan 4d,fwd,6,automatic,red,56
426850,7301591147,cadillac,xt4 sport suv 4d,fwd,6,automatic,white,28
426851,7301591140,lexus,es 350 sedan 4d,fwd,6,automatic,silver,1289


In [24]:
cars_table[(cars_table['manufacturer'] == 'volvo') & (cars_table['model'] == 's60 t5 momentum sedan 4d')]

Unnamed: 0,id,manufacturer,model,drive,cylinders,transmission,paint_color,car_id
62,7310308166,volvo,s60 t5 momentum sedan 4d,fwd,6,automatic,silver,56
63,7310276274,volvo,s60 t5 momentum sedan 4d,fwd,6,automatic,black,56
1941,7310344611,volvo,s60 t5 momentum sedan 4d,fwd,8,automatic,black,56
1942,7310319913,volvo,s60 t5 momentum sedan 4d,fwd,6,automatic,black,56
21142,7301594863,volvo,s60 t5 momentum sedan 4d,fwd,6,automatic,red,56
28748,7306238668,volvo,s60 t5 momentum sedan 4d,fwd,8,automatic,black,56
69530,7310350865,volvo,s60 t5 momentum sedan 4d,fwd,4,automatic,black,56
72404,7302668967,volvo,s60 t5 momentum sedan 4d,fwd,8,automatic,red,56
74290,7309400415,volvo,s60 t5 momentum sedan 4d,fwd,4,automatic,red,56
79663,7309250358,volvo,s60 t5 momentum sedan 4d,fwd,4,automatic,white,56


In [8]:
colors = df["paint_color"].dropna().unique()
colors_table = pd.DataFrame(colors, columns=["color"])

hex_dict = {
    'black': '#000000',
    'blue': '#0000FF',
    'brown': '#A52A2A',
    'green': '#008000',
    'grey': '#808080',
    'orange': '#FFA500',
    'purple': '#800080',
    'red': '#FF0000',
    'silver': '#C0C0C0',
    'white': '#FFFFFF',
    'yellow': '#FFFF00',
}

colors_table['hex'] = colors_table['color'].map(hex_dict)

colors_table

Unnamed: 0,color,hex
0,white,#FFFFFF
1,blue,#0000FF
2,red,#FF0000
3,black,#000000
4,silver,#C0C0C0
5,grey,#808080
6,brown,#A52A2A
7,green,#008000
8,yellow,#FFFF00
9,orange,#FFA500


In [9]:
fake = Faker()

num_users = 500

user_data = {
    'user_id': random.sample(range(10000000, 100000000), num_users), # 8 digit user_id
    'first_name': [fake.first_name() for _ in range(num_users)],
    'last_name': [fake.last_name() for _ in range(num_users)],
    'email': [fake.email() for _ in range(num_users)],
    'date_of_birth': [fake.date_of_birth(minimum_age=18, maximum_age=90) for _ in range(num_users)],
    'password': [fake.password(length=10, special_chars=True, digits=True, upper_case=True, lower_case=True) for _ in range(num_users)]
}

users_table = pd.DataFrame(user_data)
users_table

Unnamed: 0,user_id,first_name,last_name,email,date_of_birth,password
0,27212364,Brittany,Murphy,maria76@example.net,1984-09-15,%QSqQ5nq(7
1,32317483,Brandon,Ayala,vpace@example.com,1969-11-14,O!9O4qF8(I
2,34518768,Alexandra,Parker,jacqueline86@example.net,1951-09-05,r238YEmZ*W
3,73347600,Jessica,Sandoval,mgardner@example.org,1961-01-05,w#z4NzZm3+
4,19650163,Latoya,Neal,cooperstephanie@example.net,1941-12-07,VolIKelE^3
...,...,...,...,...,...,...
495,31874198,William,Thomas,robertsonjeremiah@example.org,1956-06-09,%gP3NJWz%L
496,75466571,Adam,Gibbs,brandonjones@example.com,1945-04-18,k3iPkqpe_5
497,69531356,Kristy,Gray,victoranderson@example.org,1992-07-28,C0KzPRlV+(
498,56385499,Karen,Williams,jonathan29@example.net,1949-07-15,aIj87hwiE*


In [12]:
listings_table = df[["id", "url", "region", "image_url", "description", "condition", "vin", "lat", "long", "state", "price", "posting_date"]]
listings_table["car_id"] = cars_table['car_id']

listings_table = listings_table.rename(columns={
    "url": "listing_url",
    "lat": "lat_id",
    "long": "long_id",
    "state": "state_id"
})

listings_table

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_table["car_id"] = cars_table['car_id']


Unnamed: 0,id,listing_url,region,image_url,description,condition,vin,lat_id,long_id,state_id,price,posting_date,car_id
0,7316814884,https://auburn.craigslist.org/ctd/d/auburn-uni...,auburn,https://images.craigslist.org/00R0R_lwWjXSEWNa...,Carvana is the safer way to buy a car During t...,good,3GTP1VEC4EG551563,32.590000,-85.480000,al,89.331452,2023-07-28 16:49:09,1
1,7316814758,https://auburn.craigslist.org/ctd/d/auburn-uni...,auburn,https://images.craigslist.org/00R0R_lwWjXSEWNa...,Carvana is the safer way to buy a car During t...,good,1GCSCSE06AZ123805,32.590000,-85.480000,al,60.077330,2024-04-02 13:25:13,2
2,7316814989,https://auburn.craigslist.org/ctd/d/auburn-uni...,auburn,https://images.craigslist.org/01212_jjirIWa0y0...,Carvana is the safer way to buy a car During t...,good,3GCPWCED5LG130317,32.590000,-85.480000,al,105.288246,2024-03-23 01:04:24,3
3,7316743432,https://auburn.craigslist.org/ctd/d/auburn-uni...,auburn,https://images.craigslist.org/00x0x_1y9kIOzGCF...,Carvana is the safer way to buy a car During t...,good,5TFRM5F17HX120972,32.590000,-85.480000,al,82.416841,2024-04-02 12:42:57,4
4,7316356412,https://auburn.craigslist.org/cto/d/auburn-uni...,auburn,https://images.craigslist.org/00404_l4loxHvdQe...,2013 F-150 XLT V6 4 Door. Good condition. Leve...,excellent,506XWT6HAFWG1BAT0,32.592000,-85.518900,al,39.891985,2023-07-20 04:22:18,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
426848,7301591192,https://wyoming.craigslist.org/ctd/d/atlanta-2...,wyoming,https://images.craigslist.org/00o0o_iiraFnHg8q...,Carvana is the safer way to buy a car During t...,good,1N4AA6AV6KC367801,33.786500,-84.445400,wy,62.736795,2024-04-22 13:43:38,1100
426849,7301591187,https://wyoming.craigslist.org/ctd/d/atlanta-2...,wyoming,https://images.craigslist.org/00x0x_15sbgnxCIS...,Carvana is the safer way to buy a car During t...,good,7JR102FKXLG042696,33.786500,-84.445400,wy,81.353055,2024-03-30 19:26:23,56
426850,7301591147,https://wyoming.craigslist.org/ctd/d/atlanta-2...,wyoming,https://images.craigslist.org/00L0L_farM7bxnxR...,Carvana is the safer way to buy a car During t...,good,1GYFZFR46LF088296,33.779214,-84.411811,wy,93.054704,2024-06-09 23:37:38,28
426851,7301591140,https://wyoming.craigslist.org/ctd/d/atlanta-2...,wyoming,https://images.craigslist.org/00z0z_bKnIVGLkDT...,Carvana is the safer way to buy a car During t...,good,58ABK1GG4JU103853,33.786500,-84.445400,wy,77.097910,2024-01-19 16:15:09,1292


In [13]:
import pandas as pd

state_data = {
    'state_id': ['al', 'ak', 'az', 'ar', 'ca', 'co', 'ct', 'dc', 'de', 'fl', 'ga',
                   'hi', 'id', 'il', 'in', 'ia', 'ks', 'ky', 'la', 'me', 'md', 'ma',
                   'mi', 'mn', 'ms', 'mo', 'mt', 'nc', 'ne', 'nv', 'nj', 'nm', 'ny',
                   'nh', 'nd', 'oh', 'ok', 'or', 'pa', 'ri', 'sc', 'sd', 'tn', 'tx',
                   'ut', 'vt', 'va', 'wa', 'wv', 'wi', 'wy'],
    'state_name': ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'District of Columbia', 
                   'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 
                   'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 
                   'Montana', 'North Carolina', 'Nebraska', 'Nevada', 'New Jersey', 'New Mexico', 'New York', 'New Hampshire', 
                   'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 
                   'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'],
    'time_zone': ['Central', 'Alaska', 'Mountain', 'Central', 'Pacific', 'Mountain', 'Eastern', 'Eastern', 'Eastern', 'Eastern', 
                  'Eastern', 'Hawaii-Aleutian', 'Mountain', 'Central', 'Eastern', 'Central', 'Central', 'Eastern', 'Central', 
                  'Eastern', 'Eastern', 'Eastern', 'Eastern', 'Central', 'Central', 'Central', 'Mountain', 'Eastern', 'Central', 
                  'Pacific', 'Eastern', 'Mountain', 'Eastern', 'Eastern', 'Central', 'Eastern', 'Central', 'Pacific', 'Eastern', 
                  'Eastern', 'Eastern', 'Central', 'Central', 'Central', 'Mountain', 'Eastern', 'Eastern', 'Pacific', 'Eastern', 
                  'Central', 'Mountain']
}

states_table = pd.DataFrame(state_data)
states_table

Unnamed: 0,state_id,state_name,time_zone
0,al,Alabama,Central
1,ak,Alaska,Alaska
2,az,Arizona,Mountain
3,ar,Arkansas,Central
4,ca,California,Pacific
5,co,Colorado,Mountain
6,ct,Connecticut,Eastern
7,dc,District of Columbia,Eastern
8,de,Delaware,Eastern
9,fl,Florida,Eastern


In [14]:
num_favorites = 750
favorite_data = {
    'id': [random.choice(cars_table['id']) for _ in range(num_favorites)],
    'user_id': [random.choice(users_table['user_id']) for _ in range(num_favorites)]
}

favorites_table = pd.DataFrame(favorite_data)
favorites_table['car_id'] = favorites_table['id'].map(cars_table.set_index('id')['car_id'].to_dict())

favorites_table

Unnamed: 0,id,user_id,car_id
0,7310314165,68558566,566
1,7315137757,80476560,239
2,7316138479,30931024,2846
3,7302194328,14129170,2
4,7315167224,39118733,1304
...,...,...,...
745,7309843508,31837777,656
746,7315676999,52395548,192
747,7314837762,40218374,33
748,7315150201,34887668,295


In [15]:
# # save da files
# cars_table.to_csv('cars_table.csv', index=False)
# colors_table.to_csv('colors_table.csv', index=False)
# users_table.to_csv('users_table.csv', index=False)
# rentals_table.to_csv('rentals_table.csv', index=False)
# listings_table.to_csv('listings_table.csv', index=False)
# favorites_table.to_csv('favorites_table.csv', index=False)
# states_table.to_csv('states_table.csv', index=False)

# Sample Dataset

In [6]:
sample_df = df.sample(n=20).reset_index(drop=True)
min_mileage = 5000
max_mileage = 200000
sample_df['mileage'] = [random.randint(min_mileage, max_mileage) for _ in range(len(sample_df))]

sample_df

Unnamed: 0,id,url,region,region_url,price,year,manufacturer,model,condition,cylinders,...,drive,type,paint_color,image_url,description,state,lat,long,posting_date,mileage
0,7310842213,https://raleigh.craigslist.org/ctd/d/raleigh-2...,raleigh / durham / CH,https://raleigh.craigslist.org,101.033101,2019.0,bmw,x3 xdrive30i sport utility,good,6,...,rwd,SUV,black,https://images.craigslist.org/00o0o_70u670pbXw...,Practice of you how argue nature sing. Staff u...,nc,35.82,-78.65,2023-08-20 18:45:44,66319
1,7308268142,https://savannah.craigslist.org/ctd/d/savannah...,savannah / hinesville,https://savannah.craigslist.org,90.395238,2018.0,jeep,wrangler unlimited willys,good,6,...,4wd,other,white,https://images.craigslist.org/00d0d_lQWs0jxSyo...,Her generation effect two home. Way should voi...,ga,32.08,-81.1,2024-05-24 12:59:44,109615
2,7316671476,https://chattanooga.craigslist.org/ctd/d/chatt...,chattanooga,https://chattanooga.craigslist.org,105.288246,2020.0,chevrolet,silverado 1500 crew,good,8,...,fwd,pickup,red,https://images.craigslist.org/01212_jjirIWa0y0...,After some work material industry idea season....,tn,35.06,-85.25,2023-10-05 02:45:06,136008
3,7312960878,https://wausau.craigslist.org/ctd/d/merrill-20...,wausau,https://wausau.craigslist.org,13.284031,2007.0,mercury,milan premier,good,4,...,4wd,other,black,https://images.craigslist.org/00V0V_fBjNmz6eAa...,It realize plan meeting leg.\nAbility customer...,wi,45.192647,-89.665496,2023-11-18 05:51:49,39262
4,7313394387,https://fortmyers.craigslist.org/col/ctd/d/nap...,ft myers / SW florida,https://fortmyers.craigslist.org,65.396261,2013.0,gmc,sierra 1500 extended cab slt,good,8,...,4wd,pickup,white,https://images.craigslist.org/00404_kyhHnNhxLm...,Answer size husband least forget. Because toge...,fl,26.14,-81.79,2024-04-22 19:28:59,20636
5,7314618037,https://omaha.craigslist.org/ctd/d/higginsvill...,omaha / council bluffs,https://omaha.craigslist.org,148.876889,2018.0,chevrolet,silverado 2500hd,good,8,...,4wd,sedan,silver,https://images.craigslist.org/00G0G_LtoktvJ5ve...,Water five law. Like cultural laugh member. Ci...,ia,39.0705,-93.7133,2023-11-14 18:45:23,114191
6,7315807724,https://sandiego.craigslist.org/csd/cto/d/del-...,san diego,https://sandiego.craigslist.org,74.199092,2018.0,mercedes-benz,benz c350e,excellent,4,...,rwd,sedan,black,https://images.craigslist.org/00Q0Q_l0rMAT8VuE...,Under serious hotel state my scientist. Decade...,ca,32.9555,-117.2252,2023-12-04 18:52:53,175294
7,7316764465,https://missoula.craigslist.org/ctd/d/coeur-al...,missoula,https://missoula.craigslist.org,77.119186,2019.0,volkswagen,tiguan se,good,4,...,4wd,SUV,blue,https://images.craigslist.org/00D0D_jwdKD9QnGY...,Executive foot quickly system me return. Hit a...,mt,47.696062,-116.781406,2024-01-08 11:19:59,58533
8,7311550702,https://ventura.craigslist.org/cto/d/ventura-w...,ventura county,https://ventura.craigslist.org,0.0,2000.0,jeep,wrangler,excellent,8,...,4wd,pickup,white,https://images.craigslist.org/01515_bXHzE3XHWN...,Offer they good loss. All girl social necessar...,ca,34.2788,-119.1651,2023-09-09 15:13:22,29469
9,7316951646,https://youngstown.craigslist.org/ctd/d/youngs...,youngstown,https://youngstown.craigslist.org,21.262428,2015.0,subaru,legacy awd 4dr. sedan,excellent,4,...,4wd,sedan,blue,https://images.craigslist.org/00202_2MfZlrVxlI...,Wait student become manager interest. Choice s...,oh,41.0252,-80.6687,2023-08-13 21:49:28,99449


In [7]:
unique_cars = sample_df[['manufacturer', 'model']].drop_duplicates().reset_index(drop=True)

sample_cars_table = sample_df[["manufacturer", "model", "drive", "cylinders", "transmission", "fuel"]]
sample_cars_table['drive'] = sample_cars_table['drive'].replace('other', 'gas')

# drop duplicates
sample_cars_table = sample_cars_table.drop_duplicates(subset=['manufacturer', 'model']).reset_index(drop=True)

sample_cars_table

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_cars_table['drive'] = sample_cars_table['drive'].replace('other', 'gas')


Unnamed: 0,manufacturer,model,drive,cylinders,transmission,fuel
0,bmw,x3 xdrive30i sport utility,rwd,6,automatic,gas
1,jeep,wrangler unlimited willys,4wd,6,automatic,gas
2,chevrolet,silverado 1500 crew,fwd,8,automatic,gas
3,mercury,milan premier,4wd,4,automatic,gas
4,gmc,sierra 1500 extended cab slt,4wd,8,automatic,other
5,chevrolet,silverado 2500hd,4wd,8,automatic,diesel
6,mercedes-benz,benz c350e,rwd,4,automatic,hybrid
7,volkswagen,tiguan se,4wd,4,manual,gas
8,jeep,wrangler,4wd,8,manual,gas
9,subaru,legacy awd 4dr. sedan,4wd,4,automatic,gas


In [8]:
num_users = 10

user_data = {
    'user_id': [str(uuid.uuid4())[:8] for _ in range(num_users)],
    'first_name': [fake.first_name() for _ in range(num_users)],
    'last_name': [fake.last_name() for _ in range(num_users)],
    'email': [fake.email() for _ in range(num_users)],
    'date_of_birth': [fake.date_of_birth(minimum_age=18, maximum_age=60) for _ in range(num_users)],
    'password': [fake.password(length=10, special_chars=True, digits=True, upper_case=True, lower_case=True) for _ in range(num_users)]
}


sample_users_table = pd.DataFrame(user_data)

sample_users_table

Unnamed: 0,user_id,first_name,last_name,email,date_of_birth,password
0,50d4e3b2,Xavier,Ward,sarah39@example.com,1983-02-16,*0(Fa5ncW6
1,76397991,Amanda,Peterson,hamptonchristine@example.net,1994-10-25,(U2gZrEwr!
2,346050b0,Joshua,Mosley,keith78@example.org,1993-10-12,f4C6H36z+I
3,04bc9af6,Nicole,Hutchinson,jonathon87@example.org,1992-07-11,Dc(_8kPx@_
4,6eb4d8fb,Robert,Price,osoto@example.org,1997-01-11,U&7@SDfXTH
5,32b272f2,Melissa,Trevino,robin71@example.net,1985-03-16,Pj0Bb#^w%k
6,f97c209b,Richard,Martinez,gregorygene@example.net,1992-03-24,%f$onUMtM5
7,90104220,Eddie,Smith,riveradanny@example.org,1985-06-20,%ye!DHef48
8,b1b071b2,Matthew,Mathis,thomasrobert@example.org,1986-12-19,3(hRaoj#!i
9,25d1cb2b,Tyler,Taylor,callen@example.net,1972-10-01,taaIv+96+4


In [9]:
fake = Faker()

num_owners = 10

owner_data = {
    'user_id': [str(uuid.uuid4())[:8] for _ in range(num_owners)],
    'first_name': [fake.first_name() for _ in range(num_owners)],
    'last_name': [fake.last_name() for _ in range(num_owners)],
    'email': [fake.email() for _ in range(num_owners)],
    'date_of_birth': [fake.date_of_birth(minimum_age=18, maximum_age=60) for _ in range(num_owners)],
    'password': [fake.password(length=10, special_chars=True, digits=True, upper_case=True, lower_case=True) for _ in range(num_owners)],
    'status': [random.choice(['active', 'inactive', 'banned']) for _ in range(num_owners)],
    'dealer_rating': [random.randint(1, 5) for _ in range(num_owners)],
    'dealer_num_ratings': [random.randint(1, 1000) for _ in range(num_owners)]
}

sample_owners_table = pd.DataFrame(owner_data)
sample_owners_table

Unnamed: 0,user_id,first_name,last_name,email,date_of_birth,password,status,dealer_rating,dealer_num_ratings
0,8ca4cbeb,Steven,Mccormick,sawyereric@example.org,2002-02-21,%z3BRHtrC7,active,1,754
1,b78c5eed,Bailey,Gordon,lwilliams@example.net,1964-03-31,)PW*TktXA2,inactive,3,230
2,96a93939,Jacob,Boyd,etucker@example.com,2001-11-23,L9a37V6y#J,active,2,708
3,b00d4bc2,Molly,Walters,shannon91@example.com,1970-03-26,Z9tLiAw1@9,active,3,233
4,614ae04f,Emily,Smith,sandradavis@example.net,1986-11-12,+N@3uzVxl1,active,4,421
5,e9a297d7,Monica,Lewis,jennifer64@example.org,1986-08-23,O)8^Q*wAde,active,5,78
6,1f9f9c10,Amy,Logan,kwood@example.org,2003-05-20,0wq9BILw#3,inactive,3,328
7,82c4c3bf,Rita,Weaver,robleskatherine@example.org,1968-11-07,@RHKV#NoD7,banned,2,572
8,2d2a66b0,Eric,Valenzuela,pottspatrick@example.com,1991-10-26,$rPTn^ZMq3,inactive,2,393
9,b1f64ad0,Kevin,Knight,bradleyjones@example.org,1989-08-25,adJVJrWn^9,active,4,557


In [10]:
sample_listings_table = sample_df[["id", "manufacturer", "model", "region", "image_url", "description", "condition", "state", "posting_date", "mileage"]]
sample_listings_table = sample_listings_table.rename(columns={
    "id": "listing_id",
    "state": "state_id"
})


sample_listings_table['status'] = [random.choice(['active', 'inactive']) for _ in range(len(sample_listings_table))]
sample_listings_table['price'] = [random.uniform(1, 60) for _ in range(len(sample_listings_table))]
sample_listings_table['posting_date'] = pd.to_datetime(sample_listings_table['posting_date'], errors='coerce')
sample_listings_table['posting_date'] = sample_listings_table['posting_date'].apply(lambda x: x if x.year > 1900 else datetime.now())
sample_listings_table['owner_id'] = np.random.choice(sample_owners_table['user_id'], size=len(sample_listings_table))
sample_listings_table['rent_time'] = [random.randint(5, 30) for _ in range(len(sample_listings_table))]

sample_listings_table

Unnamed: 0,listing_id,manufacturer,model,region,image_url,description,condition,state_id,posting_date,mileage,status,price,owner_id,rent_time
0,7310842213,bmw,x3 xdrive30i sport utility,raleigh / durham / CH,https://images.craigslist.org/00o0o_70u670pbXw...,Practice of you how argue nature sing. Staff u...,good,nc,2023-08-20 18:45:44,66319,inactive,15.730219,2d2a66b0,27
1,7308268142,jeep,wrangler unlimited willys,savannah / hinesville,https://images.craigslist.org/00d0d_lQWs0jxSyo...,Her generation effect two home. Way should voi...,good,ga,2024-05-24 12:59:44,109615,inactive,40.374408,b00d4bc2,22
2,7316671476,chevrolet,silverado 1500 crew,chattanooga,https://images.craigslist.org/01212_jjirIWa0y0...,After some work material industry idea season....,good,tn,2023-10-05 02:45:06,136008,inactive,50.279173,b1f64ad0,21
3,7312960878,mercury,milan premier,wausau,https://images.craigslist.org/00V0V_fBjNmz6eAa...,It realize plan meeting leg.\nAbility customer...,good,wi,2023-11-18 05:51:49,39262,inactive,59.943741,e9a297d7,10
4,7313394387,gmc,sierra 1500 extended cab slt,ft myers / SW florida,https://images.craigslist.org/00404_kyhHnNhxLm...,Answer size husband least forget. Because toge...,good,fl,2024-04-22 19:28:59,20636,active,23.960843,e9a297d7,30
5,7314618037,chevrolet,silverado 2500hd,omaha / council bluffs,https://images.craigslist.org/00G0G_LtoktvJ5ve...,Water five law. Like cultural laugh member. Ci...,good,ia,2023-11-14 18:45:23,114191,inactive,25.038366,b78c5eed,23
6,7315807724,mercedes-benz,benz c350e,san diego,https://images.craigslist.org/00Q0Q_l0rMAT8VuE...,Under serious hotel state my scientist. Decade...,excellent,ca,2023-12-04 18:52:53,175294,inactive,42.74532,96a93939,16
7,7316764465,volkswagen,tiguan se,missoula,https://images.craigslist.org/00D0D_jwdKD9QnGY...,Executive foot quickly system me return. Hit a...,good,mt,2024-01-08 11:19:59,58533,inactive,20.294453,2d2a66b0,24
8,7311550702,jeep,wrangler,ventura county,https://images.craigslist.org/01515_bXHzE3XHWN...,Offer they good loss. All girl social necessar...,excellent,ca,2023-09-09 15:13:22,29469,active,51.406886,e9a297d7,26
9,7316951646,subaru,legacy awd 4dr. sedan,youngstown,https://images.craigslist.org/00202_2MfZlrVxlI...,Wait student become manager interest. Choice s...,excellent,oh,2023-08-13 21:49:28,99449,active,47.440137,82c4c3bf,19


In [11]:
num_reviews = 15

car_review_phrases = [
    "The car runs smoothly and has great mileage.",
    "I love the interior design and the comfort it offers.",
    "The engine performance is exceptional!",
    "Very reliable and fuel-efficient.",
    "The car looks stylish and is very easy to handle.",
    "I've had some issues with the transmission.",
    "The car is a bit overpriced for what it offers.",
    "Excellent customer service from the dealer.",
    "I had a great buying experience and the car is fantastic.",
    "The sound system in the car is top-notch.",
    "It's a decent car but lacks advanced features.",
    "Very spacious and comfortable for long drives.",
    "The air conditioning system works perfectly.",
    "I had to take it for repairs sooner than expected.",
    "The car offers a smooth and quiet ride.",
    "The handling is responsive and precise.",
    "The fuel economy could be better.",
    "Comfortable seating for long trips.",
    "The car's safety features are impressive.",
    "Great value for money.",
    "The exterior design is eye-catching.",
    "The dashboard layout is intuitive and user-friendly.",
    "Acceleration is swift and powerful.",
    "The car is perfect for city driving.",
    "The brakes are sensitive and reliable.",
    "The headlights provide excellent visibility.",
    "The suspension absorbs bumps well.",
    "The seats are supportive and adjustable.",
    "Road noise is minimal.",
    "The car's reliability rating is high.",
    "Cargo space is ample for groceries and luggage.",
    "The steering wheel feels comfortable in hand.",
    "The infotainment system is easy to use.",
    "The car handles well in all weather conditions.",
    "The paint quality is durable and glossy.",
    "The mileage per gallon is impressive.",
    "Maintenance costs are reasonable.",
    "The seats are heated and ventilated.",
    "The navigation system is accurate and responsive.",
    "The car has good resale value.",
    "I feel safe driving this car.",
    "The climate control system is effective.",
    "The rearview camera provides a clear view.",
    "The car accelerates smoothly from a stop.",
    "The gearbox shifts gears seamlessly.",
    "The suspension offers a comfortable ride.",
    "The cabin is quiet at highway speeds.",
    "The car's warranty coverage is comprehensive.",
    "The touchscreen interface is intuitive.",
    "The car handles curves with confidence.",
    "The interior materials are high-quality.",
    "The car's technology features are impressive.",
    "The seats have plenty of legroom.",
    "The steering is precise and responsive.",
    "The car's braking system is reliable.",
    "The adaptive cruise control works well.",
    "The car's design is timeless.",
    "The engine is powerful yet fuel-efficient.",
    "The car's Bluetooth connectivity is seamless.",
    "The dashboard displays essential information clearly.",
    "The sound quality of the audio system is excellent.",
    "The car has good visibility all around.",
    "The car's headlights illuminate the road well.",
    "The seats are easy to adjust for comfort.",
    "The car feels stable at high speeds.",
    "The steering is light and easy to maneuver.",
    "The car has plenty of storage compartments.",
    "The car's handling is agile and responsive.",
    "The seats provide excellent lumbar support.",
    "The car's resale value holds up well.",
    "The interior is spacious and well-designed.",
    "The car accelerates quickly when needed.",
    "The car's fuel tank is large for long trips.",
    "The car's exterior color is vibrant.",
    "The car's dashboard is well-organized.",
    "The car's entertainment system keeps passengers entertained.",
    "The car's suspension absorbs bumps smoothly.",
    "The car's cabin is insulated from road noise.",
    "The car's climate control keeps the cabin comfortable.",
    "The car's seats are plush and comfortable.",
    "The car's engine is quiet at idle.",
    "The car's transmission shifts gears seamlessly.",
    "The car's steering is precise and responsive.",
    "The car's brakes are strong and reliable.",
    "The car's fuel efficiency is impressive for its size.",
    "The car's safety features provide peace of mind.",
    "The car's infotainment system is intuitive to use.",
    "The car's interior design is modern and attractive.",
    "The car's exterior design is sporty and eye-catching.",
    "The car's ride quality is smooth and comfortable.",
    "The car's acceleration is brisk and powerful.",
    "The car's handling is nimble and agile.",
    "The car's suspension offers a balanced ride.",
    "The car's seats are supportive and ergonomic.",
    "The car's cabin is spacious and airy.",
    "The car's visibility is excellent from all angles.",
    "The car's technology features are cutting-edge.",
    "The car's fuel economy is efficient.",
    "The car's steering wheel feels solid and well-designed.",
    "The car's dashboard layout is logical and user-friendly.",
    "The car's rear seats are comfortable for passengers.",
    "The car's trunk space is generous for luggage.",
    "The car's exterior styling is elegant and refined.",
    "The car's interior materials are high-grade and durable.",
    "The car's ride is smooth and composed.",
    "The car's acceleration is strong and responsive.",
    "The car's handling is agile and precise.",
    "The car's brakes are powerful and reliable.",
    "The car's fuel efficiency is excellent.",
    "The car's safety features are advanced and effective.",
    "The car's infotainment system is intuitive and feature-rich.",
    "The car's interior design is stylish and modern.",
    "The car's exterior design is sleek and aerodynamic.",
    "The car's suspension absorbs bumps with ease.",
    "The car's seats are comfortable for long drives.",
    "The car's steering is light and responsive.",
    "The car's cabin is quiet at highway speeds.",
    "The car's visibility is clear and unobstructed.",
    "The car's technology enhances the driving experience.",
    "The car's climate control system is efficient and effective.",
    "The car's seats provide ample support and comfort.",
    "The car's audio system delivers crisp and clear sound.",
    "The car's interior is well-insulated from outside noise.",
    "The car's transmission shifts smoothly between gears.",
    "The car's exterior color is vibrant and attractive.",
    "The car's headlights provide excellent illumination.",
    "The car's touchscreen interface is user-friendly.",
    "The car's navigation system is accurate and reliable.",
    "The car's cargo space is ample for groceries and luggage.",
    "The car's steering response is precise and predictable.",
    "The car's suspension offers a smooth and controlled ride.",
    "The car's seats are plush and supportive.",
    "The car's cabin layout is practical and ergonomic.",
    "The car's exterior lines are sleek and modern.",
    "The car's engine delivers strong performance.",
    "The car's fuel economy is impressive for its class.",
    "The car's safety ratings are top-notch.",
    "The car's technology suite is comprehensive and intuitive.",
    "The car's interior materials are luxurious and refined.",
    "The car's exterior design is bold and distinctive.",
    "The car's ride quality is smooth and comfortable.",
    "The car's acceleration is brisk and exhilarating.",
    "The car's handling is precise and agile.",
    "The car's braking system is responsive and reliable.",
    "The car's fuel efficiency is excellent for its size.",
    "The car's safety features provide peace of mind.",
    "The car's infotainment system is easy to use.",
    "The car's interior design is elegant and sophisticated.",
    "The car's exterior styling is dynamic and attractive.",
    "The car's suspension absorbs bumps well.",
    "The car's cabin is spacious and comfortable.",
    "The car's steering is light and responsive.",
    "The car's visibility is excellent from all angles.",
    "The car's technology features are user-friendly.",
    "The car's fuel economy is efficient.",
    "The car's steering wheel feels comfortable in hand.",
    "The car's dashboard layout is intuitive and clear.",
    "The car's rear seats offer ample legroom.",
    "The car's trunk provides generous cargo space.",
    "The car's exterior finish is durable and glossy.",
    "The car's ride is smooth and composed.",
    "The car's acceleration is brisk and powerful.",
    "The car's handling is precise and nimble.",
    "The car's brakes are strong and responsive.",
    "The car's fuel efficiency is impressive for its class.",
    "The car's safety features are advanced and effective.",
    "The car's infotainment system is intuitive and responsive.",
    "The car's interior design is modern and attractive.",
    "The car's exterior design is sporty and eye-catching.",
    "The car's suspension provides a comfortable ride.",
    "The car's seats are supportive and comfortable.",
    "The car's cabin is spacious and well-appointed.",
    "The car's visibility is excellent from the driver's seat.",
    "The car's technology features enhance convenience and safety.",
    "The car's climate control system maintains a comfortable cabin temperature.",
    "The car's seats are ergonomically designed for long trips.",
    "The car's audio system delivers immersive sound quality.",
    "The car's interior is insulated from external noise.",
    "The car's transmission shifts seamlessly through gears.",
    "The car's exterior color is vibrant and eye-catching.",
    "The car's headlights illuminate the road ahead effectively.",
    "The car's touchscreen interface is intuitive to navigate."
]

def generate_car_review():
    phrase = random.choice(car_review_phrases)
    additional_comment = fake.sentence()
    return f"{phrase} {additional_comment}"

review_data = {
    'review_id': [str(uuid.uuid4())[:8] for _ in range(num_reviews)],
    'author': [random.choice(sample_users_table['user_id']) for _ in range(num_reviews)],  # FK to users table
    'listing_id': [random.choice(sample_listings_table['listing_id']) for _ in range(num_reviews)],  # FK to listings table
    'rating': [random.randint(1, 5) for _ in range(num_reviews)],
    'comments': [generate_car_review() for _ in range(num_reviews)]
}

sample_reviews_table = pd.DataFrame(review_data)
sample_reviews_table

Unnamed: 0,review_id,author,listing_id,rating,comments
0,b526d6e5,f97c209b,7303295709,3,Very reliable and fuel-efficient. Your recent ...
1,eb2d7e58,50d4e3b2,7308050880,1,The car's exterior design is sporty and eye-ca...
2,faffd8ef,f97c209b,7313086119,2,The car's infotainment system is intuitive and...
3,4e3263fd,6eb4d8fb,7311883855,5,I had a great buying experience and the car is...
4,82f24729,b1b071b2,7313273600,3,The adaptive cruise control works well. Chair ...
5,6456f5de,50d4e3b2,7303734519,3,The car's headlights illuminate the road ahead...
6,87a07c89,346050b0,7303236346,2,The car's acceleration is brisk and powerful. ...
7,99c9003b,f97c209b,7311883855,2,The car's technology enhances the driving expe...
8,0a2a1d76,25d1cb2b,7308683920,3,The car's visibility is excellent from all ang...
9,b7069b4a,32b272f2,7313086119,4,The rearview camera provides a clear view. Fac...


In [12]:
num_rentals = 20

rental_data = {
    'rental_id': [str(uuid.uuid4())[:8] for _ in range(num_rentals)],
    'listing_id': np.random.choice(sample_listings_table['listing_id'], size=num_rentals),
    'user_id': np.random.choice(sample_users_table['user_id'], size=num_rentals),
    'rent_date': [datetime.now() - timedelta(days=random.randint(1, 365)) for _ in range(num_rentals)],
    'return_date': [None] * num_rentals,
    'status': ['renting'] * num_rentals
}

# Process each rental
for i in range(num_rentals):
    rent_date = rental_data['rent_date'][i]
    # Generate return date between 1 and 50 days after rent date
    return_date = rent_date + timedelta(days=random.randint(1, 50))
    # Check if the return date is in the past
    if return_date <= datetime.now():
        rental_data['return_date'][i] = return_date.strftime('%Y-%m-%d %H:%M')
        rental_data['status'][i] = 'returned'
    else:
        rental_data['return_date'][i] = None

sample_rentals_table = pd.DataFrame(rental_data)

sample_rentals_table['rent_date'] = sample_rentals_table['rent_date'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M'))
sample_rentals_table

Unnamed: 0,rental_id,listing_id,user_id,rent_date,return_date,status
0,ef8a1621,7313273600,90104220,2023-12-21 21:06,2023-12-23 21:06,returned
1,bc0dacb4,7314618037,25d1cb2b,2023-07-07 21:06,2023-07-20 21:06,returned
2,55f52ae1,7315807724,b1b071b2,2023-11-06 21:06,2023-12-24 21:06,returned
3,c39c3dab,7312960878,32b272f2,2024-06-19 21:06,,renting
4,94c987e4,7313086119,6eb4d8fb,2024-06-01 21:06,,renting
5,4dfbe603,7303236346,76397991,2024-01-18 21:06,2024-02-10 21:06,returned
6,1835e559,7311883855,b1b071b2,2024-01-17 21:06,2024-03-05 21:06,returned
7,56420ecb,7314936270,32b272f2,2024-02-23 21:06,2024-03-05 21:06,returned
8,dba672d8,7313394387,f97c209b,2024-02-25 21:06,2024-03-17 21:06,returned
9,828bd5d8,7310842213,f97c209b,2024-06-01 21:06,,renting


In [13]:
state_data = {
    'state_id': ['al', 'ak', 'az', 'ar', 'ca', 'co', 'ct', 'dc', 'de', 'fl', 'ga',
                   'hi', 'id', 'il', 'in', 'ia', 'ks', 'ky', 'la', 'me', 'md', 'ma',
                   'mi', 'mn', 'ms', 'mo', 'mt', 'nc', 'ne', 'nv', 'nj', 'nm', 'ny',
                   'nh', 'nd', 'oh', 'ok', 'or', 'pa', 'ri', 'sc', 'sd', 'tn', 'tx',
                   'ut', 'vt', 'va', 'wa', 'wv', 'wi', 'wy'],
    'state_name': ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'District of Columbia', 
                   'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 
                   'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 
                   'Montana', 'North Carolina', 'Nebraska', 'Nevada', 'New Jersey', 'New Mexico', 'New York', 'New Hampshire', 
                   'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 
                   'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'],
    'time_zone': ['Central', 'Alaska', 'Mountain', 'Central', 'Pacific', 'Mountain', 'Eastern', 'Eastern', 'Eastern', 'Eastern', 
                  'Eastern', 'Hawaii-Aleutian', 'Mountain', 'Central', 'Eastern', 'Central', 'Central', 'Eastern', 'Central', 
                  'Eastern', 'Eastern', 'Eastern', 'Eastern', 'Central', 'Central', 'Central', 'Mountain', 'Eastern', 'Central', 
                  'Pacific', 'Eastern', 'Mountain', 'Eastern', 'Eastern', 'Central', 'Eastern', 'Central', 'Pacific', 'Eastern', 
                  'Eastern', 'Eastern', 'Central', 'Central', 'Central', 'Mountain', 'Eastern', 'Eastern', 'Pacific', 'Eastern', 
                  'Central', 'Mountain']
}

sample_states_table = pd.DataFrame(state_data)
sample_states_table

Unnamed: 0,state_id,state_name,time_zone
0,al,Alabama,Central
1,ak,Alaska,Alaska
2,az,Arizona,Mountain
3,ar,Arkansas,Central
4,ca,California,Pacific
5,co,Colorado,Mountain
6,ct,Connecticut,Eastern
7,dc,District of Columbia,Eastern
8,de,Delaware,Eastern
9,fl,Florida,Eastern


In [14]:
num_favorites = 20
favorite_data = {
    'id': [random.choice(sample_listings_table['listing_id']) for _ in range(num_favorites)],
    'user_id': [random.choice(sample_users_table['user_id']) for _ in range(num_favorites)]
}

sample_favorites_table = pd.DataFrame(favorite_data)

sample_favorites_table

Unnamed: 0,id,user_id
0,7313273600,25d1cb2b
1,7308268142,90104220
2,7308268142,6eb4d8fb
3,7316951646,76397991
4,7311883855,25d1cb2b
5,7313273600,04bc9af6
6,7313394387,6eb4d8fb
7,7308683920,90104220
8,7312960878,f97c209b
9,7313086119,25d1cb2b


In [15]:
# save da files
sample_cars_table.to_csv('sample_cars_table.csv', index=False)
sample_users_table.to_csv('sample_users_table.csv', index=False)
sample_owners_table.to_csv('sample_owners_table.csv', index=False)
sample_rentals_table.to_csv('sample_rentals_table.csv', index=False)
sample_listings_table.to_csv('sample_listings_table.csv', index=False)
sample_favorites_table.to_csv('sample_favorites_table.csv', index=False)
sample_states_table.to_csv('sample_states_table.csv', index=False)
sample_reviews_table.to_csv('sample_reviews_table.csv', index=False)

### Data Summaries

In [28]:
lengths = []
for i in (df["url"] + df["image_url"]):
    lengths.append(len(str(i)))

max(lengths)

169

In [15]:
# lat and long
max_lat = max(df["lat"].unique())
min_lat = min(df["lat"].unique())

max_long = max(df["long"].unique())
min_long = min(df["long"].unique())

print("lat: [", min_lat, ",", max_lat, "] long: [", min_long, ",", max_long, "]")

lat: [ -84.122245 , 82.390818 ] long: [ -159.827728 , 173.885502 ]


In [19]:
df["state"].dropna().unique()

array(['al', 'ak', 'az', 'ar', 'ca', 'co', 'ct', 'dc', 'de', 'fl', 'ga',
       'hi', 'id', 'il', 'in', 'ia', 'ks', 'ky', 'la', 'me', 'md', 'ma',
       'mi', 'mn', 'ms', 'mo', 'mt', 'nc', 'ne', 'nv', 'nj', 'nm', 'ny',
       'nh', 'nd', 'oh', 'ok', 'or', 'pa', 'ri', 'sc', 'sd', 'tn', 'tx',
       'ut', 'vt', 'va', 'wa', 'wv', 'wi', 'wy'], dtype=object)

In [20]:
df["condition"].dropna().unique()

array(['good', 'excellent', 'fair', 'like new', 'new', 'salvage'],
      dtype=object)

In [23]:
user_favorites = favorites_table[favorites_table['userID'] == 'user_25764002']
user_favorites

Unnamed: 0,favoriteID,id,userID
2,ee48c532-74cb-441b-866e-bc80d93b2983,7305679700,user_25764002
62,8bc0a3ef-2b21-4166-a4f3-3c71340284a1,7315916207,user_25764002
432,70b71a2d-2668-4515-858b-43cb648357db,7316856596,user_25764002
519,f1b6b231-3410-4140-aad3-2087e9e2f1a9,7313105260,user_25764002


In [21]:
len(df['manufacturer'].unique())

42

In [19]:
df['model'].unique()

array(['sierra 1500 crew cab slt', 'silverado 1500',
       'silverado 1500 crew', ..., 'gand wagoneer', '96 Suburban',
       'Paige Glenbrook Touring'], dtype=object)

In [20]:
len(df['model'].unique())

29667