In [None]:
import pymongo
import os
from dotenv import load_dotenv
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import numpy as np


load_dotenv()

DATABASE_URL = os.getenv('DATABASE_URL')
CAR_URL = "https://www.kudosprime.com/fh5/car_sheet.php?id="

In [None]:
db_client = pymongo.MongoClient(DATABASE_URL)
db_col = db_client["fhta"]["Car"]

In [None]:
cars = pd.read_excel("fh5_cars_kudosprime.xlsx")
cars.rename(columns={
    "K' iD":"kId",
    "Group":"group",
    "Maker":"make",
    "Model":"model"
}, inplace=True)
cars

In [None]:
cars.isna().sum()

In [None]:
cars.dropna(inplace=True)
cars

In [None]:
def scrapeData(kId):
    soup = BeautifulSoup(requests.get(CAR_URL+str(kId)).content, "html.parser")
    # year                      Int?
    desc = soup.find("meta", attrs={'name':'description'})
    desc = desc.attrs["content"].split(",")
    year = -1
    for row in desc:
        if "Year" in row:
            year = int(row.split(":")[1].strip())

    # imageLink                 String?
    imageLink = "https://www.kudosprime.com/fh5/images/cars/big/fh5_car_"+  str(kId).zfill(3) + ".jpg?v=1"

    # stockPerformancePoints    Int?
    stockPerformancePoints = int(soup.find("span", class_="pimain").contents[1].text)
    try:
        # maxPerformancePoints      Int?
        maxPerformancePoints = int(soup.find("span", class_="pimax").contents[1].text)
    except:
        maxPerformancePoints = np.nan

    # stockDrivetrain           String?
    stockDrivetrain = soup.find("span", class_="transmission").text

    # stockIngamePrice          Int?
    stockIngamePrice = int(soup.find("div", class_="price").contents[0].text.replace(",", ""))

    # carSource                 String?
    carSource = soup.find("div", class_="car_source").contents[0].text

    # stockSpeed                Float?
    stockSpeed = float(soup.find("div", class_="speed").contents[0].text)

    # stockHandling             Float?
    stockHandling = float(soup.find("div", class_="handling").contents[0].text)

    # stockAcceleration         Float?
    stockAcceleration = float(soup.find("div", class_="acceleration").contents[0].text)

    # stockLaunch               Float?
    stockLaunch = float(soup.find("div", class_="launch").contents[0].text)

    # stockBraking              Float?
    stockBraking = float(soup.find("div", class_="braking").contents[0].text)

    # stockOffroad              Float?
    stockOffroad = float(soup.find("div", class_="offroad").contents[0].text)
    try:
        # stockTopSpeed             Float?
        stockTopSpeedKmh = float(soup.find("div", class_="car_perfs").contents[1].text.split(":")[1].split(" ")[0])
    except:
        stockTopSpeedKmh = np.nan
        
    try:
        # stockLateralGs            Float?
        stockLateralGs = float(soup.find("div", class_="car_perfs").contents[3].text.split(":")[1])
    except:
        stockLateralGs = np.nan
    
    try:
    # stock0To97                Float?
        stock0To97 = float(soup.find("div", class_="car_perfs").contents[5].text.split(":")[1].replace("s", ""))
    except:
        stock0To97 = np.nan
    
    try:
    # stock0To161               Float?
        stock0To161 = float(soup.find("div", class_="car_perfs").contents[7].text.split(":")[1].replace("s", ""))
    except:
        stock0To161 = np.nan

    # stockHorsepower           Int?
    stockHorsepower = int(soup.find("span", class_="power").contents[0].text)

    # stockWeight               Int?
    stockWeightLbs = int(soup.find("span", class_="weight").contents[0].text)

    data = {
        "kId": kId,
        "year": year,
        "imageLink": imageLink,
        "stockPerformancePoints": stockPerformancePoints,
        "maxPerformancePoints": maxPerformancePoints,
        "stockDrivetrain": stockDrivetrain,
        "stockIngamePrice": stockIngamePrice,
        "carSource": carSource,
        "stockSpeed": stockSpeed,
        "stockHandling": stockHandling,
        "stockAcceleration": stockAcceleration,
        "stockLaunch": stockLaunch,
        "stockBraking": stockBraking,
        "stockOffroad": stockOffroad,
        "stockTopSpeedKmh": stockTopSpeedKmh,
        "stockLateralGs": stockLateralGs,
        "stock0To97": stock0To97,
        "stock0To161": stock0To161,
        "stockHorsepower": stockHorsepower,
        "stockWeightLbs": stockWeightLbs
    }

    return data

In [None]:
data_list = []

for i in cars.kId:
    data_list.append(scrapeData(i))
    time.sleep(0.2)
    print("Scraped data of {} cars".format(i))

In [None]:
df_scraped = pd.DataFrame(data_list)
df_scraped

In [None]:
cars_merged = cars.merge(right=df_scraped, on="kId", how="left")
cars_merged

In [None]:
cars_merged.to_csv("cars.csv")

In [None]:
cars_dict = cars_merged.to_dict("records")
cars_dict

In [None]:
db_col.insert_many(cars_dict)