In [65]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time
from selenium import webdriver

### importing the list of all formula one races from wikipedia

In [104]:
url = "https://en.wikipedia.org/wiki/List_of_Formula_One_circuits"
page = requests.get(url)

In [105]:
soup = BeautifulSoup(page.text, "html")
# print(soup.prettify())

In [106]:
tables = soup.find_all("table")
# print(tables)

In [107]:
print(len(tables))

4


In [70]:
for table in tables:
    print(f"Table: <{table.name}>")
    print(f"ID: {table.get('id')}")
    print(f"Class: {table.get('class')}")

Table: <table>
ID: None
Class: ['sidebar', 'sidebar-collapse', 'nomobile', 'nowraplinks', 'plainlist']
Table: <table>
ID: None
Class: ['wikitable']
Table: <table>
ID: None
Class: ['wikitable', 'sortable']
Table: <table>
ID: None
Class: ['nowraplinks', 'mw-collapsible', 'autocollapse', 'navbox-inner']


In [71]:
# print(tables[2])

we got the table we were looking for

In [108]:
circuts = tables[2]
columns = circuts.find_all("th")
column_names = [title.text.strip() for title in columns]
print(column_names)

['Circuit', 'Map', 'Type', 'Direction', 'Location', 'Country', 'Last length used', 'Turns', 'Grands Prix', 'Season(s)', 'Grands Prix held']


we won't use "Map" column as it is an Image and it's useless for our purpose

In [73]:
column_names = [name for name in column_names if name != "Map"]

we will also extract Latitude and Longitude

In [74]:
column_names.extend(["Latitude", "Longitude"])

In [75]:
print(column_names)

['Circuit', 'Type', 'Direction', 'Location', 'Country', 'Last length used', 'Turns', 'Grands Prix', 'Season(s)', 'Grands Prix held', 'Latitude', 'Longitude']


we have the column names, let's move on to the data and put it in a dataframe

In [86]:
df = pd.DataFrame(columns = column_names)
df

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Grands Prix held,Latitude,Longitude


In [80]:
# function for extracting lat and long from a single string given in decimal values

def decimal_to_dms(decimal_str):
    import re

    def convert(value, direction):
        degrees = int(value)
        minutes_float = abs((value - degrees) * 60)
        minutes = int(minutes_float)
        seconds = round((minutes_float - minutes) * 60, 2)
        return f"{abs(degrees)}°{minutes}′{seconds}″{direction}"

    # a pattern of passed data
    match = re.match(r"([0-9.]+)°([NS])\s+([0-9.]+)°([EW])", decimal_str.strip())
    if not match:
        raise ValueError("wrong pattern of the data")

    lat_val = float(match.group(1))
    lat_dir = match.group(2)
    lon_val = float(match.group(3))
    lon_dir = match.group(4)

    lat_dms = convert(lat_val, lat_dir)
    lon_dms = convert(lon_val, lon_dir)

    return lat_dms, lon_dms

In [81]:
decimal_to_dms("32.78194°N 96.76556°W")

('32°46′54.98″N', '96°45′56.02″W')

In [182]:
# function for extracting coordinates for each circut from the link of an external corresponding circut page

def extract_coordinates(current_row):
    link_tag = current_row[0].find("a")
    if link_tag and "href" in link_tag.attrs:
        link = link_tag["href"]
        full_link = "https://en.wikipedia.org" + link 
        # print(full_link)
    else:
        print("error")
        
    driver = webdriver.Chrome()
    driver.get(full_link)
    # time.sleep(1)
    
    soup = BeautifulSoup(driver.page_source, "html.parser")
    table = soup.find("table", class_="infobox")

    # some coordinates are in the infobox and some outside it in a span with class geo-dec
    coordinates_row = table.find("th", string=lambda text: text and "Coordinates" in text)
    if coordinates_row:
        td = coordinates_row.find_next_sibling("td")
        latitude = td.find("span", class_="latitude")
        latitude = latitude.text
        longitude = td.find("span", class_="longitude")
        longitude = longitude.text
    else:
        geo_dec = soup.find("span", class_="geo-dec")
        latitude, longitude = decimal_to_dms(geo_dec.text)
        
    driver.quit()

    if latitude and longitude:
        return [latitude, longitude]
    else:
        return [0,0]
    
    

In [87]:
data = circuts.find_all("tr")
for row in data[1:]:
    current_row = row.find_all("td")
    coords = extract_coordinates(current_row)
    current_row_data = [current_row[k].text.strip() for k in range(len(current_row)) if k != 1] # omit image data from Map column
    current_row_data.extend(coords)
    print(current_row_data)
    df.loc[len(df)] = current_row_data

['Adelaide Street Circuit', 'Street circuit', 'Clockwise', 'Adelaide', 'Australia', '3.780\xa0km (2.349\xa0mi)', '16', 'Australian Grand Prix', '1985–1995', '11', '34°55′50″S', '138°37′14″E']
['Ain-Diab Circuit', 'Road circuit', 'Clockwise', 'Casablanca', 'Morocco', '7.618\xa0km (4.734\xa0mi)', '18', 'Moroccan Grand Prix', '1958', '1', '33°34′43″N', '7°41′15″W']
['Aintree Motor Racing Circuit', 'Road circuit', 'Clockwise', 'Aintree', 'United Kingdom', '4.828\xa0km (3.000\xa0mi)', '12', 'British Grand Prix', '1955, 1957, 1959, 1961–1962', '5', '53°28′37″N', '2°56′26″W']
['Albert Park Circuit *', 'Street circuit', 'Clockwise', 'Melbourne', 'Australia', '5.278\xa0km (3.280\xa0mi)', '16', 'Australian Grand Prix', '1996–2019, 2022–2025', '28', '37°50′59″S', '144°58′6″E']
['Algarve International Circuit', 'Race circuit', 'Clockwise', 'Portimão', 'Portugal', '4.653\xa0km (2.891\xa0mi)', '15', 'Portuguese Grand Prix', '2020–2021', '2', '37°13′55″N', '8°37′55″W']
['Autódromo do Estoril', 'Race 

In [92]:
df.loc[df["Latitude"] == "0"]

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Grands Prix held,Latitude,Longitude


In [132]:
df.head(10)

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Latitude,Longitude,Grands Prix held
0,Adelaide Street Circuit,Street circuit,Clockwise,Adelaide,Australia,3.780 km (2.349 mi),16,Australian Grand Prix,1985–1995,34°55′50″S,138°37′14″E,11
1,Ain-Diab Circuit,Road circuit,Clockwise,Casablanca,Morocco,7.618 km (4.734 mi),18,Moroccan Grand Prix,1958,33°34′43″N,7°41′15″W,1
2,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53°28′37″N,2°56′26″W,5
3,Albert Park Circuit *,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",37°50′59″S,144°58′6″E,28
4,Algarve International Circuit,Race circuit,Clockwise,Portimão,Portugal,4.653 km (2.891 mi),15,Portuguese Grand Prix,2020–2021,37°13′55″N,8°37′55″W,2
5,Autódromo do Estoril,Race circuit,Clockwise,Estoril,Portugal,4.360 km (2.709 mi),13,Portuguese Grand Prix,1984–1996,38°45′3″N,9°23′39″W,13
6,Autódromo Hermanos Rodríguez *,Race circuit,Clockwise,Mexico City,Mexico,4.304 km (2.674 mi),17,"Mexican Grand Prix,Mexico City Grand Prix","1963–1970, 1986–1992, 2015–2019, 2021–2024",19°24′22″N,99°5′33″W,24
7,Autódromo Internacional do Rio de Janeiro,Race circuit,Anti-clockwise,Rio de Janeiro,Brazil,5.031 km (3.126 mi),11,Brazilian Grand Prix,"1978, 1981–1989",22°58′32″S,43°23′42″W,10
8,Autodromo Internazionale del Mugello,Race circuit,Clockwise,Scarperia e San Piero,Italy,5.245 km (3.259 mi),14,Tuscan Grand Prix,2020,43°59′51″N,11°22′19″E,1
9,Autodromo Internazionale Enzo e Dino Ferrari *,Race circuit,Anti-clockwise,Imola,Italy,4.909 km (3.050 mi),17,"Italian Grand Prix,San Marino Grand Prix,Emili...","1980–2006, 2020–2022, 2024–2025",44°20′28″N,11°42′48″E,32


In [131]:
df.to_csv("circuts.csv", index=False)

## transform the table to SCD2 type, by dividing 'Season(s)' column to: year_from, year_to, isActive 

In [125]:
df = pd.read_csv('circuts.csv')

In [181]:
# mistake in wikipedia !!!! -> is active from the table as the seasons date's not 
# always match (sometimes end date is 2024 even if circuit is active in 2025)
# but the star in the name says it correctly

def parse_seasons(seasons_str, isActive): 
    periods = []
    parts = [s.strip() for s in seasons_str.split(',')]
    for part in parts:
        if ']' in part: # sometimes the dates have references in wikipedia and we want to omit those
            part = part[0:-3]
            print(part)
        if '–' in part:
            start, end = part.split('–')
            if end == "2025":
                periods.append((int(start), int(end), True))
            elif end == "2024" and isActive:
                periods.append((int(start), 2025, True))
            else:
                periods.append((int(start), int(end), False))
        else:
            if part == "2025":
                periods.append((int(part), int(part), True))
            elif part == "2024" and isActive:
                periods.append((2025, 2025, True))
            else:
                periods.append((int(part), int(part), False))
    return periods

In [120]:
df.head(20)

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Latitude,Longitude,Grands Prix held
0,Adelaide Street Circuit,Street circuit,Clockwise,Adelaide,Australia,3.780 km (2.349 mi),16,Australian Grand Prix,1985–1995,34°55′50″S,138°37′14″E,16
1,Ain-Diab Circuit,Road circuit,Clockwise,Casablanca,Morocco,7.618 km (4.734 mi),18,Moroccan Grand Prix,1958,33°34′43″N,7°41′15″W,18
2,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53°28′37″N,2°56′26″W,12
3,Albert Park Circuit *,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",37°50′59″S,144°58′6″E,16
4,Algarve International Circuit,Race circuit,Clockwise,Portimão,Portugal,4.653 km (2.891 mi),15,Portuguese Grand Prix,2020–2021,37°13′55″N,8°37′55″W,15
5,Autódromo do Estoril,Race circuit,Clockwise,Estoril,Portugal,4.360 km (2.709 mi),13,Portuguese Grand Prix,1984–1996,38°45′3″N,9°23′39″W,13
6,Autódromo Hermanos Rodríguez *,Race circuit,Clockwise,Mexico City,Mexico,4.304 km (2.674 mi),17,"Mexican Grand Prix,Mexico City Grand Prix","1963–1970, 1986–1992, 2015–2019, 2021–2024",19°24′22″N,99°5′33″W,17
7,Autódromo Internacional do Rio de Janeiro,Race circuit,Anti-clockwise,Rio de Janeiro,Brazil,5.031 km (3.126 mi),11,Brazilian Grand Prix,"1978, 1981–1989",22°58′32″S,43°23′42″W,11
8,Autodromo Internazionale del Mugello,Race circuit,Clockwise,Scarperia e San Piero,Italy,5.245 km (3.259 mi),14,Tuscan Grand Prix,2020,43°59′51″N,11°22′19″E,14
9,Autodromo Internazionale Enzo e Dino Ferrari *,Race circuit,Anti-clockwise,Imola,Italy,4.909 km (3.050 mi),17,"Italian Grand Prix,San Marino Grand Prix,Emili...","1980–2006, 2020–2022, 2024–2025",44°20′28″N,11°42′48″E,17


In [133]:
print(df.loc[3, "Season(s)"])
print(parse_seasons(df.loc[3, "Season(s)"], True))

1996–2019, 2022–2025
[(1996, 2019, False), (2022, 2025, True)]


In [99]:
def build_new_rows(row):
    periods_str = row['Season(s)']
    isActive = True if '*' in row['Circuit'] else False
    result = []
    periods = parse_seasons(periods_str, isActive)

    # sort by start year
    periods.sort()
    
    result = []
    
    for start, end, active in periods:
        new_row = row.to_dict()
        new_row['from'] = start
        new_row['to'] = end
        new_row['isActivePeriod'] = active
        result.append(new_row)
    return result

In [134]:
transformed_df = []
for _, row in df.iterrows():
    transformed_df.extend(build_new_rows(row))
    print(len(transformed_df))

scd2_df = pd.DataFrame(transformed_df)

1
2
6
8
9
10
14
16
17
20
24
26
31
32
2012–2025
34
36
46
47
48
49
52
53
54
57
58
60
66
74
79
83
85
87
96
101
104
106
107
110
116
117
118
119
121
130
131
133
135
136
137
140
141
142
144
146
147
151
155
157
167
168
169
171
2014–2024
174
175
180
181
182
183
185
1987–2024
202
203
206
207
208
209
210
211


In [138]:
scd2_df.head(15)

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Latitude,Longitude,Grands Prix held,from,to,isActivePeriod
0,Adelaide Street Circuit,Street circuit,Clockwise,Adelaide,Australia,3.780 km (2.349 mi),16,Australian Grand Prix,1985–1995,34°55′50″S,138°37′14″E,11,1985,1995,False
1,Ain-Diab Circuit,Road circuit,Clockwise,Casablanca,Morocco,7.618 km (4.734 mi),18,Moroccan Grand Prix,1958,33°34′43″N,7°41′15″W,1,1958,1958,False
2,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53°28′37″N,2°56′26″W,5,1955,1955,False
3,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53°28′37″N,2°56′26″W,5,1957,1957,False
4,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53°28′37″N,2°56′26″W,5,1959,1959,False
5,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53°28′37″N,2°56′26″W,5,1961,1962,False
6,Albert Park Circuit *,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",37°50′59″S,144°58′6″E,28,1996,2019,False
7,Albert Park Circuit *,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",37°50′59″S,144°58′6″E,28,2022,2025,True
8,Algarve International Circuit,Race circuit,Clockwise,Portimão,Portugal,4.653 km (2.891 mi),15,Portuguese Grand Prix,2020–2021,37°13′55″N,8°37′55″W,2,2020,2021,False
9,Autódromo do Estoril,Race circuit,Clockwise,Estoril,Portugal,4.360 km (2.709 mi),13,Portuguese Grand Prix,1984–1996,38°45′3″N,9°23′39″W,13,1984,1996,False


In [139]:
# scd2_df['from'] = scd2_df['from'].astype(int)
# scd2_df['to'] = scd2_df['to'].astype(int)
# scd2_df['Turns'] = scd2_df['Turns'].astype(int)
# scd2_df['Grands Prix held'] = scd2_df['Grands Prix held'].astype(int)


check with data from internet if it's true

In [179]:
print(len(scd2_df[(scd2_df['from'] <= 2014) & (scd2_df['to'] >= 2014)]))
scd2_df[(scd2_df['from'] <= 2014) & (scd2_df['to'] >= 2014)]

19


Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Latitude,Longitude,Grands Prix held,from,to,isActivePeriod
6,Albert Park Circuit,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",-37.8497,144.9683,28,1996,2019,False
22,Autodromo José Carlos Pace,Race circuit,Anti-clockwise,São Paulo,Brazil,4.309 km (2.677 mi),15,"Brazilian Grand Prix,São Paulo Grand Prix","1973–1977, 1979–1980, 1990–2019, 2021–2024",-23.7011,-46.6972,41,1990,2019,False
25,Autodromo Nazionale di Monza,Race circuit,Clockwise,Monza,Italy,5.793 km (3.600 mi),11,Italian Grand Prix,"1950–1979, 1981–2024",45.6206,9.2894,74,1981,2025,True
33,Bahrain International Circuit,Race circuit,Clockwise,Sakhir,Bahrain,5.412 km (3.363 mi),15,"Bahrain Grand Prix,Sakhir Grand Prix","2004–2010, 2012–2025[a]",26.0325,50.5106,22,2012,2025,True
53,Circuit de Barcelona-Catalunya,Race circuit,Clockwise,Montmeló,Spain,4.657 km (2.894 mi),14,Spanish Grand Prix,1991–2025,41.57,2.2611,35,1991,2025,True
55,Circuit de Monaco,Street circuit,Clockwise,Monte Carlo,Monaco,3.337 km (2.074 mi),19,Monaco Grand Prix,"1950, 1955–2019, 2021–2025",43.7347,7.4206,71,1955,2019,False
73,Circuit de Spa-Francorchamps,Race circuit,Clockwise,Stavelot,Belgium,7.004 km (4.352 mi),20,Belgian Grand Prix,"1950–1956, 1958, 1960–1968, 1970, 1983, 1985–2...",50.4372,5.9714,57,2007,2025,True
81,Circuit Gilles-Villeneuve,Street circuit,Clockwise,Montreal,Canada,4.361 km (2.710 mi),13,Canadian Grand Prix,"1978–1986, 1988–2008, 2010–2019, 2022–2024",45.5006,-73.5225,43,2010,2019,False
85,Circuit of the Americas,Race circuit,Anti-clockwise,Austin,United States,5.513 km (3.426 mi),20,United States Grand Prix,"2012–2019, 2021–2024",30.1328,-97.6411,12,2012,2019,False
127,Hockenheimring,Race circuit,Clockwise,Hockenheim,Germany,4.574 km (2.842 mi),16,German Grand Prix,"1970, 1977–1984, 1986–2006, 2008, 2010, 2012, ...",49.3278,8.5658,37,2014,2014,False


In [146]:
scd2_df[scd2_df['Type'] == "Road circuit"]

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Latitude,Longitude,Grands Prix held,from,to,isActivePeriod
1,Ain-Diab Circuit,Road circuit,Clockwise,Casablanca,Morocco,7.618 km (4.734 mi),18,Moroccan Grand Prix,1958,33°34′43″N,7°41′15″W,1,1958,1958,False
2,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53°28′37″N,2°56′26″W,5,1955,1955,False
3,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53°28′37″N,2°56′26″W,5,1957,1957,False
4,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53°28′37″N,2°56′26″W,5,1959,1959,False
5,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53°28′37″N,2°56′26″W,5,1961,1962,False
31,AVUS,Road circuit,Anti-clockwise,Berlin,Germany,8.300 km (5.157 mi),4,German Grand Prix,1959,52°28′50″N,13°15′05″E,1,1959,1959,False
49,Charade Circuit,Road circuit,Clockwise,Saint-Genès-Champanelle,France,8.055 km (5.005 mi),38,French Grand Prix,"1965, 1969–1970, 1972",45°44′50″N,3°02′20″E,4,1965,1965,False
50,Charade Circuit,Road circuit,Clockwise,Saint-Genès-Champanelle,France,8.055 km (5.005 mi),38,French Grand Prix,"1965, 1969–1970, 1972",45°44′50″N,3°02′20″E,4,1969,1970,False
51,Charade Circuit,Road circuit,Clockwise,Saint-Genès-Champanelle,France,8.055 km (5.005 mi),38,French Grand Prix,"1965, 1969–1970, 1972",45°44′50″N,3°02′20″E,4,1972,1972,False
52,Circuit Bremgarten,Road circuit,Clockwise,Bern,Switzerland,7.208 km (4.479 mi),14,Swiss Grand Prix,1950–1954,46°57′00″N,7°24′39″E,5,1950,1954,False


In [156]:
# function to convert coords from dms to decimals

def dms_to_decimal(dms_str):
    import re

    # Umożliwia: 1°17′29.51″N lub 103°51′49.86″E itp.
    pattern = r"(\d+)°(\d+)′([\d.]+)″([NSEW])"
    match = re.match(pattern, dms_str.strip())
    if not match:
        raise ValueError(f"Niepoprawny format: {dms_str}")

    degrees, minutes, seconds, direction = match.groups()
    degrees = int(degrees)
    minutes = int(minutes)
    seconds = float(seconds)  # <- uwzględnia ułamki

    decimal = degrees + minutes / 60 + seconds / 3600

    if direction in ['S', 'W']:
        decimal = -decimal

    return round(decimal,4)

In [158]:
scd2_df['Latitude'] = scd2_df['Latitude'].apply(dms_to_decimal)
scd2_df['Longitude'] = scd2_df['Longitude'].apply(dms_to_decimal)


ValueError: Niepoprawny format: 47°56′N

In [159]:
scd2_df[scd2_df['Latitude'] == "47°56′N"]

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Latitude,Longitude,Grands Prix held,from,to,isActivePeriod
47,Bugatti Au Mans,Race circuit,Clockwise,Le Mans,France,4.430 km (2.753 mi),8,French Grand Prix,1967,47°56′N,0°14′E,1,1967,1967,False


In [173]:
# new function as pattern didnt always match

def dms_to_decimal(dms_str):
    import re

    dms_str = dms_str.strip()

    # (np. 47°56′12″N)
    full_match = re.match(r"(\d+)°(\d+)′([\d.]+)″([NSEW])", dms_str)
    # (np. 47°56′N)
    no_seconds_match = re.match(r"(\d+)°(\d+)′([NSEW])", dms_str)

    if full_match:
        degrees, minutes, seconds, direction = full_match.groups()
        degrees = int(degrees)
        minutes = int(minutes)
        seconds = float(seconds)
    elif no_seconds_match:
        degrees, minutes, direction = no_seconds_match.groups()
        degrees = int(degrees)
        minutes = int(minutes)
        seconds = 0.0
    else:
        raise ValueError(f"Niepoprawny format: {dms_str}")

    decimal = degrees + minutes / 60 + seconds / 3600

    if direction in ['S', 'W']:
        decimal = -decimal

    return round(decimal, 4)

In [164]:
scd2_df['Latitude'] = scd2_df['Latitude'].apply(dms_to_decimal)
scd2_df['Longitude'] = scd2_df['Longitude'].apply(dms_to_decimal)

In [169]:
scd2_df['Circuit'] = scd2_df['Circuit'].str.replace(r'\*$', '', regex=True)

In [170]:
scd2_df.head(15)

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Latitude,Longitude,Grands Prix held,from,to,isActivePeriod
0,Adelaide Street Circuit,Street circuit,Clockwise,Adelaide,Australia,3.780 km (2.349 mi),16,Australian Grand Prix,1985–1995,-34.9306,138.6206,11,1985,1995,False
1,Ain-Diab Circuit,Road circuit,Clockwise,Casablanca,Morocco,7.618 km (4.734 mi),18,Moroccan Grand Prix,1958,33.5786,-7.6875,1,1958,1958,False
2,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1955,1955,False
3,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1957,1957,False
4,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1959,1959,False
5,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1961,1962,False
6,Albert Park Circuit,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",-37.8497,144.9683,28,1996,2019,False
7,Albert Park Circuit,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",-37.8497,144.9683,28,2022,2025,True
8,Algarve International Circuit,Race circuit,Clockwise,Portimão,Portugal,4.653 km (2.891 mi),15,Portuguese Grand Prix,2020–2021,37.2319,-8.6319,2,2020,2021,False
9,Autódromo do Estoril,Race circuit,Clockwise,Estoril,Portugal,4.360 km (2.709 mi),13,Portuguese Grand Prix,1984–1996,38.7508,-9.3942,13,1984,1996,False


In [171]:
scd2_df['Direction'].value_counts()

Direction
Clockwise                                                172
Anti-clockwise                                            36
Part clockwise and part anti-clockwise (figure eight)      3
Name: count, dtype: int64

In [174]:
scd2_df.to_csv("circuts_scd2.csv", index=False)

### artificially generating circut id

In [184]:
df_scd = pd.read_csv("circuts_scd2.csv")

In [185]:
df_scd.head(10)

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Latitude,Longitude,Grands Prix held,from,to,isActivePeriod
0,Adelaide Street Circuit,Street circuit,Clockwise,Adelaide,Australia,3.780 km (2.349 mi),16,Australian Grand Prix,1985–1995,-34.9306,138.6206,11,1985,1995,False
1,Ain-Diab Circuit,Road circuit,Clockwise,Casablanca,Morocco,7.618 km (4.734 mi),18,Moroccan Grand Prix,1958,33.5786,-7.6875,1,1958,1958,False
2,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1955,1955,False
3,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1957,1957,False
4,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1959,1959,False
5,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1961,1962,False
6,Albert Park Circuit,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",-37.8497,144.9683,28,1996,2019,False
7,Albert Park Circuit,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",-37.8497,144.9683,28,2022,2025,True
8,Algarve International Circuit,Race circuit,Clockwise,Portimão,Portugal,4.653 km (2.891 mi),15,Portuguese Grand Prix,2020–2021,37.2319,-8.6319,2,2020,2021,False
9,Autódromo do Estoril,Race circuit,Clockwise,Estoril,Portugal,4.360 km (2.709 mi),13,Portuguese Grand Prix,1984–1996,38.7508,-9.3942,13,1984,1996,False


In [193]:
import random

In [190]:
len(df_scd['Circuit'].unique())

77

In [199]:
def generate_key(name):
    random.seed(name)
    key = random.randint(10000,99999)
    return key

In [200]:
df_scd["CircuitLocationID"] = df_scd["Circuit"].apply(generate_key)

In [201]:
df_scd.head(10)

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Grands Prix,Season(s),Latitude,Longitude,Grands Prix held,from,to,isActivePeriod,CircuitLocationID
0,Adelaide Street Circuit,Street circuit,Clockwise,Adelaide,Australia,3.780 km (2.349 mi),16,Australian Grand Prix,1985–1995,-34.9306,138.6206,11,1985,1995,False,46946
1,Ain-Diab Circuit,Road circuit,Clockwise,Casablanca,Morocco,7.618 km (4.734 mi),18,Moroccan Grand Prix,1958,33.5786,-7.6875,1,1958,1958,False,20953
2,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1955,1955,False,26284
3,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1957,1957,False,26284
4,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1959,1959,False,26284
5,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,British Grand Prix,"1955, 1957, 1959, 1961–1962",53.4769,-2.9406,5,1961,1962,False,26284
6,Albert Park Circuit,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",-37.8497,144.9683,28,1996,2019,False,73060
7,Albert Park Circuit,Street circuit,Clockwise,Melbourne,Australia,5.278 km (3.280 mi),16,Australian Grand Prix,"1996–2019, 2022–2025",-37.8497,144.9683,28,2022,2025,True,73060
8,Algarve International Circuit,Race circuit,Clockwise,Portimão,Portugal,4.653 km (2.891 mi),15,Portuguese Grand Prix,2020–2021,37.2319,-8.6319,2,2020,2021,False,68059
9,Autódromo do Estoril,Race circuit,Clockwise,Estoril,Portugal,4.360 km (2.709 mi),13,Portuguese Grand Prix,1984–1996,38.7508,-9.3942,13,1984,1996,False,60169


check if no key is duplicated

In [202]:
len(df_scd["CircuitLocationID"].unique())

77

we are okay

In [203]:
df_scd.to_csv("circuts_scd2.csv", index=False)


In [211]:
df_scd = df_scd.drop("Season(s)", axis='columns')

In [213]:
df_scd = df_scd.drop("Grands Prix", axis='columns')

KeyError: "['Grands Prix'] not found in axis"

In [214]:
df_scd.head()

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Latitude,Longitude,Grands Prix held,from,to,isActivePeriod,CircuitLocationID
0,Adelaide Street Circuit,Street circuit,Clockwise,Adelaide,Australia,3.780 km (2.349 mi),16,-34.9306,138.6206,11,1985,1995,False,46946
1,Ain-Diab Circuit,Road circuit,Clockwise,Casablanca,Morocco,7.618 km (4.734 mi),18,33.5786,-7.6875,1,1958,1958,False,20953
2,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,53.4769,-2.9406,5,1955,1955,False,26284
3,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,53.4769,-2.9406,5,1957,1957,False,26284
4,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828 km (3.000 mi),12,53.4769,-2.9406,5,1959,1959,False,26284


In [215]:
df_scd["Circuit"] = df_scd["Circuit"].astype("str")

In [223]:
df_scd["Circuit"] = df_scd["Circuit"].str.strip()

In [227]:
df_scd[df_scd["Circuit"] == "Suzuka International Racing Course"]

Unnamed: 0,Circuit,Type,Direction,Location,Country,Last length used,Turns,Latitude,Longitude,Grands Prix held,from,to,isActivePeriod,CircuitLocationID
203,Suzuka International Racing Course,Race circuit,Part clockwise and part anti-clockwise (figure...,Suzuka,Japan,5.807 km (3.608 mi),18,34.8417,136.5389,35,1987,2006,False,60239
204,Suzuka International Racing Course,Race circuit,Part clockwise and part anti-clockwise (figure...,Suzuka,Japan,5.807 km (3.608 mi),18,34.8417,136.5389,35,2009,2019,False,60239
205,Suzuka International Racing Course,Race circuit,Part clockwise and part anti-clockwise (figure...,Suzuka,Japan,5.807 km (3.608 mi),18,34.8417,136.5389,35,2022,2025,True,60239


In [228]:
df_scd.to_csv("circuts_scd2.csv", index=False)

In [229]:
df_scd["isActivePeriod"] = df_scd["isActivePeriod"].astype(str)

In [231]:
df_scd.dtypes

Circuit               object
Type                  object
Direction             object
Location              object
Country               object
Last length used      object
Turns                 object
Latitude             float64
Longitude            float64
Grands Prix held       int64
from                   int64
to                     int64
isActivePeriod        object
CircuitLocationID      int64
dtype: object

In [232]:
df_scd.to_csv("circuts_scd2.csv", index=False)