Todo:
- délka okruhů všude (Indy)
- japonské GPs 70s
- Drivers competing at the Indianapolis 500 in 1950 through 1960 were credited with participation in and earned points towards the World Championship of Drivers.[17] However, the machines competing at Indianapolis were not necessarily run to Formula One specifications and regulations.[18] The drivers also earned separate points (on a different scale) towards the respective AAA or USAC national championships. No points, however, were awarded by the FIA towards the World Constructors' Championship.


In [684]:
import pandas as pd
import polars as pl
import altair as alt
pl.Config(tbl_rows=50)

<polars.config.Config at 0x21daf9cd820>

In [685]:
df = pd.read_json("data/ergast-results.json")
df = pl.from_pandas(df)

In [686]:
races = pd.read_csv("data_raw/ergast/races.csv")
races = pl.from_pandas(races)

In [687]:
circuits = pd.read_csv("data_raw/ergast/circuits.csv")
circuits = pl.from_pandas(circuits)

## Kalendář

In [689]:
from calendar import monthrange

In [690]:
def monthday(date_value):
    # Here date_value is a datetime.datetime object, not a Polars column
    month_length = monthrange(date_value.year, date_value.month)[1]
    return date_value.month + (date_value.day / month_length)

In [691]:
df = df.with_columns(pl.col("date").dt.ordinal_day().alias("day"))
df = df.with_columns(pl.col("date").dt.week().alias("week"))
df = df.with_columns(pl.col("date").map_elements(monthday, return_dtype=float).alias("monthday"))

asia = ['UAE','India','Bahrain','Malaysia', 'Qatar', 'China','Japan','Singapore',
             'Saudi Arabia','Korea']
namerica = ['USA','Canada','Mexico','United States']
samerica = ['Brazil', 'Argentina']
weurope = ['Netherlands','Portugal','Germany','UK','France','Belgium','Sweden','Switzerland','Italy','Spain','Monaco','Austria']
eeurope = ['Russia','Hungary','Azerbaijan','Turkey']
oceania = ['Australia']
africa = ['South Africa','Morocco']

street = ['Nevada','Las Vegas','Jeddah','Dallas','Montreal','Detroit','Phoenix','Monte-Carlo','Oporto','Adelaide','Baku','Marina Bay','Valencia','Miami']

nightIds = df.filter(pl.col("location").is_in(['Las Vegas','Jeddah','Marina Bay','Abu Dhabi','Al Daayen']) | ((pl.col('country') == 'Bahrain') & (pl.col('year') >= 2014))).select(pl.col('raceId')).to_series().to_list()

def get_continent(country): ## this is the ugliest thing ever writtern but I'm being sleepy and curious at the same time
    if country in asia:
        return "Asie"
    if country in namerica:
        return "S. Amerika"
    if country in samerica:
        return "J. Amerika"
    if country in weurope:
        return "záp. Evropa"
    if country in eeurope:
        return "vých. Evropa"
    if country in oceania:
        return "Austrálie"
    if country in africa:
        return "Afrika"

def get_type(location):
    if location in street:
        return "dočasný okruh"
    else:
        return "stálý okruh"

def get_light(id):
    if id in nightIds:
        return "umělé osvětlení"
    else:
        return "denní závod"

df = df.with_columns(pl.col('country').map_elements(get_continent, return_dtype=str).alias('region'))
df = df.with_columns(pl.col('location').map_elements(get_type, return_dtype=str).alias('type'))
df = df.with_columns(pl.col('raceId').map_elements(get_light, return_dtype=str).alias('osvětlení'))

In [692]:
def lap_length(course_description: str) -> float: 
    return float(course_description.split(' ')[0].split('[')[0].strip())

In [693]:
gps_wiki = pl.read_parquet('data/gps_wikipedia.parquet')

gps_wiki = gps_wiki.with_columns(pl.col('Course').map_elements(lap_length, return_dtype=float).alias('Lap_length')).with_columns(
    (pl.col('Lap_length') / pl.col('Fastest_lap') * 3600).alias('Fastest_lap_speed')
)

In [694]:
gps_wiki.columns

['raceId',
 'Date',
 'Official name',
 'Distance',
 'Scheduled distance',
 'Course',
 'Weather',
 'Attendance',
 'Laps',
 'Kms',
 'Fastest_lap',
 'Safety_car',
 'Virtual_safety_car',
 'Lap_length',
 'Fastest_lap_speed']

In [695]:
championship_races = df.group_by("raceId").agg(pl.col(["date","year","week","day","monthday","region","type","osvětlení","location"]).min()).join(
    df.group_by("raceId").agg(pl.col('constructor').unique().len().alias('constructors')), how='left', on='raceId').join(
    df.group_by("raceId").agg(pl.col('driverId').unique().len().alias('drivers')), how='left', on='raceId').join(
    gps_wiki, on='raceId', how='left'
    ).with_columns(
    (pl.col("Kms") / pl.col("Laps")).alias("Course lenght")
    ).with_columns(
    pl.col("year").map_elements(lambda x: str(x)[0:3], return_dtype=str).alias('decade')
    ).filter(~pl.col('location').is_null()).sort(by="date")

championship_races

umele = "permanentní"
mestske = "městská"
silnicni = "silniční"

def circuit_type(description):
    silnice = ["public","temporary road",'temporary street/road circuit']
    mesto = ["street",'purpose-built temporary circuit','albert park']
    okruh = ['permanent','grand prix circuit','yas marina','race track','korea','former military airbase','reims-gueux','airport facility']
    for x in silnice:
        if x in description.lower():
            return silnicni
    for x in mesto:
        if x in description.lower():
            return mestske
    for x in okruh:
        if x in description.lower():
            return umele
    print(f"Nerozumím: {description}")

championship_races = championship_races.with_columns(pl.col("Course").map_elements(circuit_type,return_dtype=str).alias('typ trati')).with_columns(pl.col('typ trati').fill_null(umele))
championship_races = championship_races.with_columns(
    pl.when(
        pl.col("location") == "Phoenix"
    ).then(
        pl.lit(mestske)
    ).when(
        pl.col('location') == 'Clermont-Ferrand'
    ).then(
        pl.lit(silnicni)
    ).otherwise(
        pl.col("typ trati")
    ).alias("typ trati")
)

def delkova_kategorie(kilometraz):
    if kilometraz <= 5:
        return "< 5 km"
    elif kilometraz <= 10:
        return "5-10 km"
    else:
        return "> 10 km"

championship_races = championship_races.with_columns(pl.col("Course lenght").map_elements(delkova_kategorie, return_dtype=str).alias("délka okruhu"))

def casova_kategorie(nejkolo):
    if nejkolo <= 90:
        return "< 1:30"
    elif nejkolo <= 120:
        return "1:30-2:00"
    elif nejkolo <= 300:
        return "2:00-5:00"
    elif nejkolo > 300:
        return "> 5:00"

championship_races = championship_races.with_columns(pl.col("Fastest_lap").map_elements(casova_kategorie, return_dtype=str).alias("nejrychlejší kolo"))
championship_races = championship_races.with_columns(pl.col("nejrychlejší kolo").fill_null("< 1:30"))

In [696]:
championship_races.filter(pl.col("Course").str.contains("(?i)street")).group_by("location").len().sort(by="len",descending=True)

location,len
str,u32
"""Monte-Carlo""",70
"""Montreal""",39
"""Melbourne""",26
"""Marina Bay""",15
"""Adelaide""",11
"""California""",8
"""Baku""",8
"""Detroit""",7
"""Barcelona""",6
"""Bern""",5


In [697]:
championship_races.filter(pl.col("Course").str.contains("(?i)permanent")).group_by("location").len().sort(by="len",descending=True)

location,len
str,u32
"""Monza""",74
"""Silverstone""",59
"""Spa""",50
"""São Paulo""",41
"""Nürburg""",41
"""Budapest""",39
"""Spielberg""",38
"""Hockenheim""",37
"""Montmeló""",34
"""Zandvoort""",34


In [698]:
championship_races.filter(pl.col("délka okruhu").is_null())

raceId,date,year,week,day,monthday,region,type,osvětlení,location,constructors,drivers,Date,Official name,Distance,Scheduled distance,Course,Weather,Attendance,Laps,Kms,Fastest_lap,Safety_car,Virtual_safety_car,Lap_length,Fastest_lap_speed,Course lenght,decade,typ trati,délka okruhu,nejrychlejší kolo
f64,datetime[ns],f64,i8,i16,f64,str,str,str,str,u32,u32,str,str,str,str,str,str,str,f64,f64,f64,bool,bool,f64,f64,f64,str,str,str,str
761.0,1959-08-02 00:00:00,1959.0,31,214,8.064516,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Berlin""",5,15,"""2 August 1959""","""XXI Grosser Preis von Deutschl…","""2x30 laps, 498.00 km (309.42 m…",,"""Public road/Permanent racing f…","""Dry and dull.""",,,,124.5,,,,,,"""195""","""silniční""",,"""2:00-5:00"""
594.0,1974-05-12 00:00:00,1974.0,19,132,5.387097,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Brussels""",15,32,"""12 May 1974""",,,,"""Permanent racing facility""",,,,,71.31,,,,,,"""197""","""permanentní""",,"""< 1:30"""
597.0,1974-06-23 00:00:00,1974.0,25,174,6.766667,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Zandvoort""",14,27,"""23 June 1974""","""XXI Grand Prix Zandvoort""",,,"""Permanent racing facility""",,,,,81.44,,,,,,"""197""","""permanentní""",,"""< 1:30"""
601.0,1974-08-18 00:00:00,1974.0,33,230,8.580645,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Spielberg""",15,31,"""18 August 1974""","""XII Memphis Grand Prix von Öst…",,,"""Permanent racing facility""",,,,,97.22,,,,,,"""197""","""permanentní""",,"""1:30-2:00"""
445.0,1984-07-22 00:00:00,1984.0,29,204,7.709677,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Kent""",15,28,"""22 July 1984""","""XXXVII John Player British Gra…","""71 (aggregated: 11 + 60) laps,…","""75 laps, 315.450[4] or 315.457…","""Permanent racing facility""",,,,,73.191,,,,,,"""198""","""permanentní""",,"""< 1:30"""
221.0,1997-09-28 00:00:00,1997.0,39,271,9.933333,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Nürburg""",11,22,"""28 September 1997""","""Grosser Preis von Luxemburg 19…","""67 laps, between 305.233 and 3…",,"""Permanent racing facility""","""Partially cloudy, mild and dry""",,,,78.805,,,,,,"""199""","""permanentní""",,"""< 1:30"""
20.0,2008-04-06 00:00:00,2008.0,14,97,4.2,"""Asie""","""stálý okruh""","""denní závod""","""Sakhir""",11,22,"""6 April 2008""","""2008 Formula 1 Gulf Air Bahrai…","""57[2] laps, 308.238 km (191.53…",,"""Permanent racing facility""","""Dry""","""100,000 (Weekend)[3]""",,,93.193,,,,,,"""200""","""permanentní""",,"""1:30-2:00"""
34.0,2008-10-19 00:00:00,2008.0,42,293,10.612903,"""Asie""","""stálý okruh""","""denní závod""","""Shanghai""",10,20,"""19 October 2008""","""2008 Formula 1 Sinopec Chinese…","""56[2] laps, 305.066 km (189.55…",,"""Permanent Racing Facility""","""Dry""",,,,96.325,,,,,,"""200""","""permanentní""",,"""1:30-2:00"""
1063.0,2021-08-29 00:00:00,2021.0,34,241,8.935484,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Spa""",10,20,"""29 August 2021""","""Formula 1 Rolex Belgian Grand …","""1 lap, 6.880 km (4.275 miles)""","""44 laps, 308.052 km (191.415 m…","""Permanent racing facility""","""Wet; persistent rain""","""213,000[2]""",,,,True,True,,,,"""202""","""permanentní""",,"""< 1:30"""


In [699]:
from src.kristi_promin import kristi_promin
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')

ThemeRegistry.enable('irozhlas')

In [700]:
gps_vysvetlivky = {
    '195':['1950-51 | Mistrovství světa začíná jako ryze evropský podnik, soustředěný do',
           'přelomu jara a léta. Oficiálně se do něj řadí i 500 mil Indianapolis, termínově', 
           'ale koliduje s evropskými závody a jezdci přelétají jen výjimečně.'],
    '196':['1960-61 | Pevnou součástí kalendáře se stává lednový závod v Jihoafrické',
           'republice a podzimní Velké ceny ve Spojených státech a Kanadě.',
          'Mistrovství světa začíná naplňovat svůj název.'],
    '197':['1970-79 | Volání jezdců po větší bezpečnosti vyústí v konec posledních',
          'tratí vedených po silnicích: Spa-Francorchamps (1970) a Charade (1972).',
          'Po vážné nehodě Nikiho Laudy končí i Nürburgring (1976), s téměř 23 km',
          'nejdelší okruh šampionátu. Dál už se jezdí na tratích podobných dnešním.'],
    '198':['1980-89 | Závody Formule 1 hledají americké publikum na městských tratích',
          'v Detroitu, Dallasu a Palm Beach. V roce 1986 se poprvé jede ve východním',
          'bloku, na Hungaroringu poblíž Budapešti. O rok později se F1 natrvalo vrací',
          'do Japonska. Kalendář přestává být roztahaný po celém roce, etabluje se',
          'dnešní formát od března do podzimu.'],
    '199':["1990-99 | Klidná dekáda. Velká cena Austrálie se přesouvá z konce sezony",
          "na její začátek a F1 poprvé proniká do kontinentální Asie – do Malajsie."],
    '200':['2000-09 | Přibývá závodů v Asii a na Blízkém východě. V Singapuru se',
          'v roce 2008 poprvé jede v noci pod umělým osvětlením, o rok později seriál',
          'poprvé končí v Abú Zabí, kde se závodí za setmění. V kalendáři se začíná',
          'zřetelně rýsovat srpnová pauza, umožňující lidem okolo F1 strávit při',
          'narůstajícím množství Velkých cen alespoň nějaký čas s rodinami.'],
    '201':['2010-19 | Začátek velkého návratu F1 do Severní Ameriky: do USA, Kanady',
          'i Mexika. Poprvé se jede v Rusku a Ázerbajdžánu. Stále nabitější kalendář',
           'se protahuje až do adventu.'],
    '202':['2020-25 | V seriálu mají historicky nejvyšší podíly noční závody a závody na',
          'městských okruzích. Dvě sezony poznamenává pandemie. Od roku 2021',
          'se na třech až šesti Velkých cenách jezdívá i kratší sobotní závod, sprint.']}

legenda = "right"

graf_kalendar = alt.Chart(
    championship_races.with_columns(pl.col("decade").replace_strict(gps_vysvetlivky), allow_object=True).to_pandas(),
    width=330,
    title=alt.Title("75 let MS F1: proměny kalendáře",
                   subtitle=[f"{len(championship_races)} Velkých cen v jediném obrázku."])
).mark_point(filled=True,
             color='#84c0e4'
).encode(
    alt.Y(
        "year:O", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6'), title=None
    ),
    alt.X(
        "monthday:Q", axis=alt.Axis(
            domainOpacity=0, tickColor='#DCDDD6',
            labelAlign='center',    # Align labels to the right
            labelOffset=15,        # Add some spacing between tick and label
            labelPadding=5,
            labelExpr="['I.', 'II.', 'III.', 'IV.', 'V.', 'VI.', 'VII.', 'VIII.', 'IX.', 'X.', 'XI.', 'XII.'][parseInt(datum.value) - 1]"
        ), scale=alt.Scale(domainMax=12.9, domainMin=1), title=None
    ),
    alt.Color(
        'region:N', legend=alt.Legend(orient=legenda, direction="vertical"),
        scale=alt.Scale(range=['#687fa9','#84c0e4','#D74862','#994D56','#F2AB74','#789256','#b7d5a9']),
        sort=['záp. Evropa','vých. Evropa','S. Amerika','J. Amerika','Asie','Austrálie','Afrika']
    ),
#    alt.Size('Fastest_lap:Q', legend=alt.Legend(orient=legenda, direction="vertical")),
    alt.Size('nejrychlejší kolo:N', legend=alt.Legend(orient=legenda, direction="vertical"), sort=["< 1:30","1:30-2:00","2:00-5:00","> 5:00"], scale=alt.Scale(rangeMin=80,rangeMax=240)),
    alt.Shape('typ trati:N', scale=alt.Scale(
        range=['circle','diamond','triangle'],
    ), sort=[umele, mestske, silnicni], legend=alt.Legend(orient=legenda, direction="vertical")),
    alt.Stroke('osvětlení:N', 
               scale=alt.Scale(domain=['denní světlo', 'umělé osvětlení'], range=['white', '#a39889']), legend=alt.Legend(orient=legenda, direction="vertical")),
    alt.StrokeWidth('osvětlení:N',
                   scale=alt.Scale(domain=['denní závod', 'umělé osvětlení'], range=[0, 3]), legend=None),
    alt.Row('decade:O', title=None, header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='start', labelOrient='top', labelFont='Asap'))
).resolve_axis(y='independent',x='independent').resolve_scale(y='independent',color='shared').configure_view(
    stroke='transparent'
)

graf_kalendar

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df

In [701]:
graf_kalendar.save("grafy/kalendar.svg")

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df

## Vozy

In [703]:
gps_wiki.sample(10)

raceId,Date,Official name,Distance,Scheduled distance,Course,Weather,Attendance,Laps,Kms,Fastest_lap,Safety_car,Virtual_safety_car,Lap_length,Fastest_lap_speed
f64,str,str,str,str,str,str,str,f64,f64,f64,bool,bool,f64,f64
343.0,"""30 May 2010""","""2010 Formula 1 Turkish Grand P…","""58 laps, 309.396 km (192.250[a…",,"""Permanent racing facility""","""Air: 28 to 29 °C (82 to 84 °F)…","""35,000""",58.0,309.396,89.165,,,,
790.0,"""5 August 1956""","""XVIII Großer Preis von Deutsch…","""22 laps, 501.820 km (311.806 m…",,"""Permanent road course""",,,22.0,501.82,581.6,,,,
82.0,"""24 July 2005""","""Formula 1 Grosser Mobil 1 Prei…","""67 laps, 306.458 km (190.424 m…",,"""Permanent racing facility""","""Cloudy with drizzle, but stayi…",,67.0,306.458,74.873,,,,
785.0,"""13 May 1956""","""XIV Grand Prix Automobile de M…","""100 laps, 314.500 km (195.421 …",,"""Street Circuit""","""Warm, dry, sunny""",,100.0,314.5,104.4,,,,
452.0,"""13 March 1983""","""XII Grande Prêmio do Brasil""","""63 laps, 316.953 km (196.945 m…",,"""Permanent racing facility""","""Dry""",,63.0,316.953,99.829,,,,
944.0,"""15 November 2015 (2015-11-15)""","""Formula 1 Grande PrêmioPetrobr…","""71 laps, 305.909 km (190.083 m…",,"""Permanent racing facility""","""Dry22–28 °C (72–82 °F) air tem…",,71.0,305.909,74.832,,,,
382.0,"""28 August 1988""","""XLVI Champion Belgian Grand Pr…","""43 laps, 298.420 km (185.429 m…",,"""Permanent racing facility""","""Warm, dry and overcast""",,43.0,298.42,120.772,,,,
1099.0,"""19 March 2023""","""Formula 1 STC Saudi Arabian Gr…","""50 laps, 308.450 km (191.662 m…",,"""Street Circuit""","""Clear""","""150,000[3]""",50.0,308.45,91.906,True,,,
402.0,"""1 November 1987""","""XIII Fuji Television Japanese …","""51 laps, 298.860 km (185.703 m…",,"""Permanent racing facility""","""Dry""","""247,000[2]""",51.0,298.86,103.844,,,,
956.0,"""3 July 2016""","""Formula 1 Pirelli Grosser Prei…","""71 laps, 307.146 km (190.564 m…",,"""Permanent racing facility""","""Cloudy""","""85,000 (Weekend)[4]""",71.0,307.146,68.411,True,,,


In [704]:
flaps = gps_wiki.join(
    races.with_columns(pl.col('raceId').cast(float)), how="left", on="raceId"
).group_by('year').agg(
    pl.col('Fastest_lap_speed').median().alias('median_speed'),
    pl.col('Fastest_lap_speed').min().alias('min_speed'),
    pl.col('Fastest_lap_speed').max().alias('max_speed')
).sort(by='year').unpivot(index="year")
flaps


year,variable,value
i64,str,f64
1950,"""median_speed""",
1951,"""median_speed""",
1952,"""median_speed""",
1953,"""median_speed""",
1954,"""median_speed""",
1955,"""median_speed""",
1956,"""median_speed""",
1957,"""median_speed""",
1958,"""median_speed""",
1959,"""median_speed""",


In [705]:
flaps2 = gps_wiki.join(
    races.with_columns(pl.col('raceId').cast(float)), how="left", on="raceId"
).group_by('year').agg(
    pl.col('Fastest_lap_speed').alias('all_speeds')
).sort(by='year').unpivot(index="year").explode('value')
flaps2

year,variable,value
i64,str,f64
1950,"""all_speeds""",
1950,"""all_speeds""",
1950,"""all_speeds""",
1950,"""all_speeds""",
1950,"""all_speeds""",
1950,"""all_speeds""",
1950,"""all_speeds""",
1951,"""all_speeds""",
1951,"""all_speeds""",
1951,"""all_speeds""",


In [706]:
gps_wiki.group_by("Weather").len().sort(by="len",descending=True)

Weather,len
str,u32
"""Sunny""",142
,101
"""Dry""",101
"""Clear""",34
"""Partly cloudy""",31
"""Cloudy""",24
"""Sunny, mild, dry""",12
"""Sunny and hot""",12
"""Dry and sunny""",10
"""Sunny and warm""",10


In [707]:
rain = gps_wiki.filter(pl.col("Weather").str.contains("((?i)rain|(?i)wet)")).select(pl.col("Weather")).to_series().to_list()
rain

['Dry start, with heavy rain and thunderstorm/monsoon later',
 'Rain',
 'Wet, drying later.',
 'Dry at first; light rain in the final stages',
 'Rain, later drying',
 'Cloudy, rain in last 3 laps',
 'Heavy rain, dry towards the end',
 'Rain at beginning and end, otherwise drying',
 'Heavy Rain on Lap 1. Drying track at restart. Light Rain before the finish.',
 'Very heavy rain',
 'Intermittent light rain',
 'Cool and rainy with temperatures reaching up to 20\xa0°C (68\xa0°F)[4]',
 'Cloudy, wet track that progressively dried up.Rainy during the last few laps.',
 'Wet and dry',
 'Dry, light rain at the startAir Temp 34\xa0°C (93\xa0°F)',
 'Wet track at the start, dry later on',
 'Dry/Wet',
 'Rainy: 20\xa0°C (68\xa0°F)',
 'Dry/Wet, Air Temp: 21°C',
 'Rain and Thunderstorms at start, dry later; Air Temp at start: 32\xa0°C (90\xa0°F)',
 'Dry first, rain and thunderstorms later',
 'Dry at first, rain later on, Air: 11\xa0°C (52\xa0°F), Track: 12\xa0°C (54\xa0°F)',
 'Cloudy and raining; Air 1

In [708]:
flaps3 = gps_wiki.filter(~pl.col('Weather').is_in(rain)).join(
    races.with_columns(pl.col('raceId').cast(float)), how="left", on="raceId"
).group_by('year').agg(
    pl.col('Fastest_lap_speed').median().alias('median_speed'),
    pl.col('Fastest_lap_speed').min().alias('min_speed'),
    pl.col('Fastest_lap_speed').max().alias('max_speed')
).sort(by='year').unpivot(index="year")
flaps


year,variable,value
i64,str,f64
1950,"""median_speed""",
1951,"""median_speed""",
1952,"""median_speed""",
1953,"""median_speed""",
1954,"""median_speed""",
1955,"""median_speed""",
1956,"""median_speed""",
1957,"""median_speed""",
1958,"""median_speed""",
1959,"""median_speed""",


In [709]:
from src.alt_friendly import alt_friendly

In [710]:
alt.Chart(alt_friendly(flaps.with_columns(pl.col('year').alias('rok')))).mark_line().encode(
    alt.X('rok:T'),
    alt.Y('value:Q'),
    alt.Color('variable:N')
)

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [711]:
alt.Chart(alt_friendly(flaps3.with_columns(pl.col('year').alias('rok')))).mark_line().encode(
    alt.X('rok:T'),
    alt.Y('value:Q'),
    alt.Color('variable:N')
)

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [712]:
alt.Chart(alt_friendly(flaps2.with_columns(pl.col('year').alias('rok')))).mark_point(size=1).encode(
    alt.X('rok:T'),
    alt.Y('value:Q'),
    alt.Color('variable:N')
)

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [713]:
statuses = df.group_by('status').len().sort(by='len',descending=True).select(pl.col('status')).drop_nulls().to_series().to_list()

In [714]:
', '.join(statuses)

'Finished, +1 Lap, Engine, +2 Laps, Accident, Did not qualify, Collision, Gearbox, Spun off, +3 Laps, Suspension, +4 Laps, Did not prequalify, Transmission, Electrical, Brakes, Withdrew, +5 Laps, Clutch, Not classified, Fuel system, +6 Laps, Disqualified, Turbo, Hydraulics, Overheating, Ignition, Oil leak, Throttle, Retired, Out of fuel, +7 Laps, Halfshaft, Wheel, Oil pressure, Fuel pump, Differential, Collision damage, Tyre, Handling, +8 Laps, Fuel leak, Steering, Radiator, Power Unit, Puncture, +9 Laps, Wheel bearing, Injection, Fuel pressure, Water leak, +10 Laps, Physical, Alternator, Exhaust, Chassis, Mechanical, Magneto, Driveshaft, Axle, Heat shield fire, +11 Laps, Battery, Power loss, Distributor, Oil pump, +13 Laps, Injury, Oil pipe, +14 Laps, +12 Laps, Electronics, Broken wing, Rear wing, Driver unwell, Vibrations, +15 Laps, Water pressure, Wheel nut, 107% Rule, Excluded, Water pump, Injured, +16 Laps, +17 Laps, Supercharger, Technical, Front wing, ERS, +24 Laps, Pneumatics, 

In [715]:
ok = ["Finished"] + [x for x in statuses if "Lap" in x]

In [716]:
dnq = ['Did not qualify','Did not prequalify','107% Rule']

In [717]:
not_ok = [x for x in statuses if x not in ok]

In [718]:
', '.join(not_ok)

'Engine, Accident, Did not qualify, Collision, Gearbox, Spun off, Suspension, Did not prequalify, Transmission, Electrical, Brakes, Withdrew, Clutch, Not classified, Fuel system, Disqualified, Turbo, Hydraulics, Overheating, Ignition, Oil leak, Throttle, Retired, Out of fuel, Halfshaft, Wheel, Oil pressure, Fuel pump, Differential, Collision damage, Tyre, Handling, Fuel leak, Steering, Radiator, Power Unit, Puncture, Wheel bearing, Injection, Fuel pressure, Water leak, Physical, Alternator, Exhaust, Chassis, Mechanical, Magneto, Driveshaft, Axle, Heat shield fire, Battery, Power loss, Distributor, Oil pump, Injury, Oil pipe, Electronics, Broken wing, Rear wing, Driver unwell, Vibrations, Water pressure, Wheel nut, 107% Rule, Excluded, Water pump, Injured, Supercharger, Technical, Front wing, ERS, Pneumatics, Undertray, Fuel, Fire, Water pipe, Spark plugs, Fuel pipe, Wheel rim, Illness, Fatal accident, Fuel rig, Oil line, Safety, Safety concerns, Damage, Track rod, Stalled, Drivetrain, 

In [719]:
qdf = df.filter(~pl.col('status').is_in(dnq))

In [720]:
zavady = ['Engine','Gearbox','Suspension','Transmission','Electrical','Brakes','Clutch','Fuel system','Turbo','Hydraulics','Overheating','Ignition','Oil leak','Throttle','Out of fuel', 'Halfshaft', 'Wheel','Oil pressure', 'Fuel pump', 'Differential',
         'Tyre', 'Handling', 'Fuel leak', 'Steering', 'Radiator', 'Power Unit', 'Puncture', 'Wheel bearing', 'Injection', 'Fuel pressure', 'Water leak', 'Physical', 'Alternator', 'Exhaust', 'Mechanical', 'Chassis', 'Magneto', 'Driveshaft', 'Axle', 'Heat shield fire', 'Battery', 'Power loss', 'Distributor', 'Oil pump',
         'Oil pipe','Electronics', 'Vibrations','Wheel nut','Water pressure','Water pump','ERS', 'Supercharger','Technical', 'Fuel', 'Pneumatics', 'Undertray', 'Water pipe','Fire', 'Spark plugs', 'Fuel pipe', 'Wheel rim','Drivetrain','Oil line', 'Damage', 'Fuel rig', 'Safety belt', 'Driver Seat', 'CV joint',
         'Cooling system','Launch control','Seat', 'Engine fire', 'Refuelling', 'Engine misfire', 'Crankshaft', 'Brake duct']

In [721]:
graf_dokonceni = qdf.group_by('year').len().rename({'len':'started'}).join(
    qdf.filter(pl.col('status').is_in(zavady)).group_by('year').len(), how='left', on='year'
).rename({'len':'porucha'}).drop_nulls().with_columns(pl.col('porucha') / pl.col('started')).sort(by='year').rename({'year':'rok'})
graf_dokonceni

rok,started,porucha
f64,u32,f64
1950.0,160,0.375
1951.0,179,0.418994
1952.0,204,0.348039
1953.0,246,0.402439
1954.0,229,0.384279
1955.0,177,0.378531
1956.0,189,0.391534
1957.0,166,0.355422
1958.0,226,0.367257
1959.0,181,0.359116


In [722]:
graf_dokonceni2 = qdf.filter(pl.col('grid') <= 10).group_by('year').len().rename({'len':'started'}).join(
    qdf.filter(pl.col('grid') <= 10).filter(pl.col('status').is_in(zavady)).group_by('year').len(), how='left', on='year'
).rename({'len':'porucha'}).drop_nulls().with_columns(pl.col('porucha') / pl.col('started')).sort(by='year').rename({'year':'rok'})
graf_dokonceni2

rok,started,porucha
f64,u32,f64
1950.0,75,0.413333
1951.0,85,0.223529
1952.0,83,0.253012
1953.0,103,0.359223
1954.0,104,0.355769
1955.0,84,0.25
1956.0,94,0.414894
1957.0,85,0.352941
1958.0,113,0.345133
1959.0,91,0.32967


In [723]:
alt.Chart(
    alt_friendly(graf_dokonceni)
).mark_line(
).encode(
    alt.X('rok:T'),
    alt.Y('porucha:Q')
)

In [724]:
alt.Chart(
    alt_friendly(graf_dokonceni2)
).mark_line(
).encode(
    alt.X('rok:T'),
    alt.Y('porucha:Q')
)

df.group_by('status').len().sort(by='len',descending=True)

## Piloti

In [727]:
df.group_by("year").agg(pl.col("driverId").unique().len()).sort(by="year")

year,driverId
f64,u32
,1
1950.0,81
1951.0,84
1952.0,105
1953.0,108
1954.0,97
1955.0,84
1956.0,85
1957.0,76
1958.0,87


In [728]:
alt.Chart(alt_friendly(df.group_by("year").agg(pl.col("driverId").unique().len()).sort(by="year").rename({"year":"rok"}))).mark_bar(
).encode(
    alt.X("rok:T"),
    alt.Y("driverId:Q")
)

In [729]:
alt.Chart(df.with_columns(
    pl.col("year").map_elements(lambda x: str(x)[0:3], return_dtype=str).alias('decade')
    ).group_by("decade").agg(pl.col("driverId").unique().len()).sort(by="decade")).mark_point(
).encode(
    alt.Y("decade:N"),
    alt.X("driverId:Q")
)

In [730]:
df.with_columns(
    pl.col("year").map_elements(lambda x: str(x)[0:3], return_dtype=str).alias('decade')
    ).group_by("decade").agg(pl.col("driverId").unique().len()).sort(by="decade")

decade,driverId
str,u32
,1
"""195""",332
"""196""",219
"""197""",173
"""198""",115
"""199""",105
"""200""",71
"""201""",66
"""202""",36


In [731]:
df.group_by('driverId').agg(pl.col('year').min()).group_by('year').agg(pl.col('driverId').unique().len()).sort(by='year').with_columns(
    pl.col("year").map_elements(lambda x: str(x)[0:3], return_dtype=str).alias('decade')
    ).group_by("decade").agg(pl.col("driverId").sum()).sort(by="decade")

decade,driverId
str,u32
,1
"""195""",332
"""196""",145
"""197""",135
"""198""",79
"""199""",66
"""200""",49
"""201""",41
"""202""",14


Co ukázat:
- kontinent (barva)
- počet závodů (průhlednost)
- mrtví (kroužek)
- ženy (tvar)

In [733]:
df.group_by("driverId").agg(pl.col("year").min()).drop_nulls().with_columns(
    pl.col("year").map_elements(lambda x: str(x)[0:3], return_dtype=str).alias('decade')
    )

driverId,year,decade
f64,f64,str
254.0,1975.0,"""197"""
285.0,1976.0,"""197"""
544.0,1960.0,"""196"""
794.0,1950.0,"""195"""
64.0,1995.0,"""199"""
469.0,1962.0,"""196"""
739.0,1952.0,"""195"""
173.0,1979.0,"""197"""
87.0,1991.0,"""199"""
149.0,1990.0,"""199"""


In [734]:
kvalifikovani = df.filter(~pl.col("status").is_in(dnq)).select(pl.col("name")).to_series().to_list()

In [735]:
df.filter(~pl.col("name").is_in(kvalifikovani)).filter(pl.col("status").is_in(dnq)).group_by("name").len().sort(by="len",descending=True)

name,len
str,u32
"""Claudio Langes""",14
"""Pedro Chaves""",13
"""Dave Kennedy""",7
"""Joachim Winkelhock""",7
"""Enrico Bertaggia""",6
"""Tony Trimmer""",6
"""Michael Bartels""",4
"""Alberto Colombo""",3
"""Giovanna Amati""",3
"""Gianfranco Brancatelli""",3


In [736]:
df.filter(pl.col("year") == 1997).filter(pl.col("gp") == "Australian Grand Prix")

resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,driverRef,code,forename,surname,dob,nationality,driverUrl,year,round,date,quali_date,quali_time,location,country,gp,status,constructor,constructorNationality,constructorRef,name,day,week,monthday,region,type,osvětlení
f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,str,str,str,str,str,f64,f64,str,str,str,str,str,str,str,f64,f64,datetime[ns],str,str,str,str,str,str,str,str,str,str,i16,i8,f64,str,str,str
4005.0,207.0,14.0,1.0,10.0,4.0,1.0,"""1""",1.0,10.0,58.0,"""1:30:28.718""","""5428718""","""\N""","""\N""","""\N""",,1.0,"""coulthard""","""COU""","""David""","""Coulthard""","""1971-03-27T00:00:00.000""","""British""","""http://en.wikipedia.org/wiki/D…",1997.0,1.0,1997-03-09 00:00:00,"""1997-03-09T00:00:00.000""","""\N""","""Melbourne""","""Australia""","""Australian Grand Prix""","""Finished""","""McLaren""","""British""","""mclaren""","""David Coulthard""",68,10,3.290323,"""Austrálie""","""stálý okruh""","""denní závod"""
4007.0,207.0,57.0,1.0,9.0,6.0,3.0,"""3""",3.0,4.0,58.0,"""+22.177""","""5450895""","""\N""","""\N""","""\N""",,1.0,"""hakkinen""","""\N""","""Mika""","""Häkkinen""","""1968-09-28T00:00:00.000""","""Finnish""","""http://en.wikipedia.org/wiki/M…",1997.0,1.0,1997-03-09 00:00:00,"""1997-03-09T00:00:00.000""","""\N""","""Melbourne""","""Australia""","""Australian Grand Prix""","""Finished""","""McLaren""","""British""","""mclaren""","""Mika Häkkinen""",68,10,3.290323,"""Austrálie""","""stálý okruh""","""denní závod"""
4023.0,207.0,35.0,3.0,3.0,1.0,,"""R""",19.0,0.0,0.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,4.0,"""villeneuve""","""VIL""","""Jacques""","""Villeneuve""","""1971-04-09T00:00:00.000""","""Canadian""","""http://en.wikipedia.org/wiki/J…",1997.0,1.0,1997-03-09 00:00:00,"""1997-03-09T00:00:00.000""","""\N""","""Melbourne""","""Australia""","""Australian Grand Prix""","""Collision""","""Williams""","""British""","""williams""","""Jacques Villeneuve""",68,10,3.290323,"""Austrálie""","""stálý okruh""","""denní závod"""
4012.0,207.0,49.0,3.0,4.0,2.0,8.0,"""8""",8.0,0.0,55.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,23.0,"""frentzen""","""\N""","""Heinz-Harald""","""Frentzen""","""1967-05-18T00:00:00.000""","""German""","""http://en.wikipedia.org/wiki/H…",1997.0,1.0,1997-03-09 00:00:00,"""1997-03-09T00:00:00.000""","""\N""","""Melbourne""","""Australia""","""Australian Grand Prix""","""Brakes""","""Williams""","""British""","""williams""","""Heinz-Harald Frentzen""",68,10,3.290323,"""Austrálie""","""stálý okruh""","""denní závod"""
4006.0,207.0,30.0,6.0,5.0,3.0,2.0,"""2""",2.0,6.0,58.0,"""+20.046""","""5448764""","""\N""","""\N""","""\N""",,1.0,"""michael_schumacher""","""MSC""","""Michael""","""Schumacher""","""1969-01-03T00:00:00.000""","""German""","""http://en.wikipedia.org/wiki/M…",1997.0,1.0,1997-03-09 00:00:00,"""1997-03-09T00:00:00.000""","""\N""","""Melbourne""","""Australia""","""Australian Grand Prix""","""Finished""","""Ferrari""","""Italian""","""ferrari""","""Michael Schumacher""",68,10,3.290323,"""Austrálie""","""stálý okruh""","""denní závod"""
4024.0,207.0,56.0,6.0,6.0,5.0,,"""R""",20.0,0.0,0.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,4.0,"""irvine""","""\N""","""Eddie""","""Irvine""","""1965-11-10T00:00:00.000""","""British""","""http://en.wikipedia.org/wiki/E…",1997.0,1.0,1997-03-09 00:00:00,"""1997-03-09T00:00:00.000""","""\N""","""Melbourne""","""Australia""","""Australian Grand Prix""","""Collision""","""Ferrari""","""Italian""","""ferrari""","""Eddie Irvine""",68,10,3.290323,"""Austrálie""","""stálý okruh""","""denní závod"""
4010.0,207.0,78.0,15.0,17.0,13.0,6.0,"""6""",6.0,1.0,58.0,"""+1:36.040""","""5524758""","""\N""","""\N""","""\N""",,1.0,"""larini""","""\N""","""Nicola""","""Larini""","""1964-03-19T00:00:00.000""","""Italian""","""http://en.wikipedia.org/wiki/N…",1997.0,1.0,1997-03-09 00:00:00,"""1997-03-09T00:00:00.000""","""\N""","""Melbourne""","""Australia""","""Australian Grand Prix""","""Finished""","""Sauber""","""Swiss""","""sauber""","""Nicola Larini""",68,10,3.290323,"""Austrálie""","""stálý okruh""","""denní závod"""
4025.0,207.0,65.0,15.0,16.0,7.0,,"""R""",21.0,0.0,0.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,4.0,"""herbert""","""\N""","""Johnny""","""Herbert""","""1964-06-25T00:00:00.000""","""British""","""http://en.wikipedia.org/wiki/J…",1997.0,1.0,1997-03-09 00:00:00,"""1997-03-09T00:00:00.000""","""\N""","""Melbourne""","""Australia""","""Australian Grand Prix""","""Collision""","""Sauber""","""Swiss""","""sauber""","""Johnny Herbert""",68,10,3.290323,"""Austrálie""","""stálý okruh""","""denní závod"""
4022.0,207.0,23.0,17.0,11.0,12.0,,"""R""",18.0,0.0,1.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,6.0,"""ralf_schumacher""","""SCH""","""Ralf""","""Schumacher""","""1975-06-30T00:00:00.000""","""German""","""http://en.wikipedia.org/wiki/R…",1997.0,1.0,1997-03-09 00:00:00,"""1997-03-09T00:00:00.000""","""\N""","""Melbourne""","""Australia""","""Australian Grand Prix""","""Gearbox""","""Jordan""","""Irish""","""jordan""","""Ralf Schumacher""",68,10,3.290323,"""Austrálie""","""stálý okruh""","""denní závod"""
4020.0,207.0,21.0,17.0,12.0,14.0,,"""R""",16.0,0.0,14.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,20.0,"""fisichella""","""FIS""","""Giancarlo""","""Fisichella""","""1973-01-14T00:00:00.000""","""Italian""","""http://en.wikipedia.org/wiki/G…",1997.0,1.0,1997-03-09 00:00:00,"""1997-03-09T00:00:00.000""","""\N""","""Melbourne""","""Australia""","""Australian Grand Prix""","""Spun off""","""Jordan""","""Irish""","""jordan""","""Giancarlo Fisichella""",68,10,3.290323,"""Austrálie""","""stálý okruh""","""denní závod"""


In [737]:
df.group_by(["constructor","year","gp"]).agg(pl.col("driverId").unique().len()).sort(by="driverId",descending=False).filter(pl.col('driverId') >= 9).sort(by="year",descending=True)

constructor,year,gp,driverId
str,f64,str,u32
"""Lotus-Climax""",1962.0,"""British Grand Prix""",9
"""Lotus-Climax""",1961.0,"""United States Grand Prix""",9
"""Lotus-Climax""",1961.0,"""Belgian Grand Prix""",10
"""Lotus-Climax""",1961.0,"""French Grand Prix""",10
"""Lotus-Climax""",1961.0,"""Italian Grand Prix""",10
"""Cooper-Climax""",1961.0,"""German Grand Prix""",11
"""Lotus-Climax""",1961.0,"""British Grand Prix""",11
"""Cooper-Climax""",1960.0,"""British Grand Prix""",9
"""Cooper-Climax""",1959.0,"""Monaco Grand Prix""",9
"""Kurtis Kraft""",1959.0,"""Indianapolis 500""",12


In [738]:
df.filter(pl.col('location') != "Indianapolis").group_by(["constructor","year"]).agg(pl.col("driverId").unique().len()).sort(by="driverId",descending=True)

constructor,year,driverId
str,f64,u32
"""Maserati""",1956.0,26
"""Lotus-Climax""",1962.0,24
"""Lotus-Climax""",1961.0,23
"""Maserati""",1958.0,23
"""Cooper-Climax""",1960.0,22
"""Maserati""",1954.0,20
"""Cooper-Climax""",1959.0,19
"""Maserati""",1955.0,18
"""Cooper-Climax""",1961.0,17
"""Cooper""",1958.0,17


## Pravidla

In [937]:
from datetime import date

In [995]:
sc = championship_races.filter(
    pl.col("Safety_car") == True
).group_by_dynamic(
    index_column="date",every="1y"
).agg(
    pl.col("raceId").unique().len()
).join(
    championship_races.group_by_dynamic(
        index_column="date",every="1y"
    ).agg(
        pl.col("raceId").unique().len()
    ), how="left", on="date"
).with_columns(
    (pl.col("raceId") / pl.col("raceId_right")).alias("podil")
).rename(
    {"date":"zacatek"}
).with_columns(
    pl.col("zacatek").map_elements(lambda x: date(x.year, 12, 31)
).alias("konec")
).with_columns(
    pl.col("zacatek").cast(date)
).with_columns(
    pl.lit("safety car").alias("pravidlo")
).with_columns(
    pl.lit("sport").alias("typ")
).drop(['raceId','raceId_right'])

  ).with_columns(


In [993]:
vsc = championship_races.filter(
    pl.col("Virtual_safety_car") == True
).group_by_dynamic(
    index_column="date",every="1y"
).agg(
    pl.col("raceId").unique().len()
).join(
    championship_races.group_by_dynamic(
        index_column="date",every="1y"
    ).agg(
        pl.col("raceId").unique().len()
    ), how="left", on="date"
).with_columns(
    (pl.col("raceId") / pl.col("raceId_right")).alias("podil")
).rename(
    {"date":"zacatek"}
).with_columns(
    pl.col("zacatek").map_elements(lambda x: date(x.year, 12, 31)
).alias("konec")
).with_columns(
    pl.col("zacatek").cast(date)
).with_columns(
    pl.lit("virtuální safety car").alias("pravidlo")
).with_columns(
    pl.lit("sport").alias("typ")
).drop(['raceId','raceId_right'])


  ).with_columns(


In [1027]:
df.group_by(["year","date","constructor"]).agg(pl.col("driverId").unique().len()).filter(pl.col("driverId") >= 3).sort(by='year',descending=True).head(5)

year,date,constructor,driverId
f64,datetime[ns],str,u32
1985.0,1985-08-04 00:00:00,"""Renault""",3
1984.0,1984-10-21 00:00:00,"""Renault""",3
1983.0,1983-09-25 00:00:00,"""Williams""",3
1982.0,1982-06-13 00:00:00,"""March""",3
1982.0,1982-06-06 00:00:00,"""March""",3


In [1063]:
championship_races.filter(pl.col("Kms") > 500).sort(by="date",descending=True)

raceId,date,year,week,day,monthday,region,type,osvětlení,location,constructors,drivers,Date,Official name,Distance,Scheduled distance,Course,Weather,Attendance,Laps,Kms,Fastest_lap,Safety_car,Virtual_safety_car,Lap_length,Fastest_lap_speed,Course lenght,decade,typ trati,délka okruhu,nejrychlejší kolo
f64,datetime[ns],f64,i8,i16,f64,str,str,str,str,u32,u32,str,str,str,str,str,str,str,f64,f64,f64,bool,bool,f64,f64,f64,str,str,str,str
750.0,1960-06-19 00:00:00,1960.0,24,171,6.633333,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Spa""",5,19,"""19 June 1960""","""XXI Grote Prijs Van Belgie""","""36 laps, 508.32 km (315.864 mi…",,"""Grand Prix Circuit""",,,36.0,508.32,231.9,,,,,14.12,"""196""","""permanentní""","""> 10 km""","""2:00-5:00"""
748.0,1960-05-30 00:00:00,1960.0,22,151,5.967742,"""S. Amerika""","""stálý okruh""","""denní závod""","""Indianapolis""",10,33,"""May 30, 1960 (1960-5-30)""",,"""200 laps, 804.672 km (500.000 …",,"""Permanent racing facility""",,,200.0,804.672,,,,,,4.02336,"""196""","""permanentní""","""< 5 km""","""< 1:30"""
757.0,1959-05-30 00:00:00,1959.0,22,150,5.967742,"""S. Amerika""","""stálý okruh""","""denní závod""","""Indianapolis""",11,33,"""May 30, 1959 (1959-5-30)""",,"""200 laps, 804.672 km (500.000 …",,"""Permanent racing facility""",,,200.0,804.672,,,,,,4.02336,"""195""","""permanentní""","""< 5 km""","""< 1:30"""
768.0,1958-05-30 00:00:00,1958.0,22,150,5.967742,"""S. Amerika""","""stálý okruh""","""denní závod""","""Indianapolis""",7,33,"""May 30, 1958 (1958-5-30)""",,"""200 laps, 804.672 km (500.000 …",,"""Permanent racing facility""",,,200.0,804.672,,,,,,4.02336,"""195""","""permanentní""","""< 5 km""","""< 1:30"""
783.0,1957-09-08 00:00:00,1957.0,36,251,9.266667,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Monza""",3,19,"""8 September 1957""","""XXVIII Gran Premio d'Italia""","""87 laps, 500.25 km (310.84 mil…",,"""Permanent road course""",,,87.0,500.25,103.7,,,,,5.75,"""195""","""permanentní""","""5-10 km""","""1:30-2:00"""
781.0,1957-08-04 00:00:00,1957.0,31,216,8.129032,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Nürburg""",5,24,"""4 August 1957""","""XIX Großer Preis von Deutschla…","""22 laps, 501.820 km (311.806 m…",,"""Permanent road course""",,,22.0,501.82,557.4,,,,,22.81,"""195""","""permanentní""","""> 10 km""","""> 5:00"""
779.0,1957-07-07 00:00:00,1957.0,27,188,7.225806,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Rouen""",5,15,"""7 July 1957 (1957-7-7)""","""XLIII Grand Prix de l'ACF""","""77 laps, 503.734 km (313.005 m…",,"""Permanent racing facility""",,,77.0,503.734,142.4,,,,,6.542,"""195""","""permanentní""","""5-10 km""","""2:00-5:00"""
778.0,1957-05-30 00:00:00,1957.0,22,150,5.967742,"""S. Amerika""","""stálý okruh""","""denní závod""","""Indianapolis""",7,33,"""May 30, 1953 (1953-5-30)""",,"""200 laps, 804.672 km (500.000 …",,"""Permanent racing facility""",,,200.0,804.672,,,,,,4.02336,"""195""","""permanentní""","""< 5 km""","""< 1:30"""
791.0,1956-09-02 00:00:00,1956.0,35,246,9.066667,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Monza""",5,26,"""2 September 1956""","""XXVII Gran Premio d'Italia""","""50 laps, 500.023 km (310.700 m…",,"""Permanent road course""","""Cloudy, warm, alternating ligh…",,50.0,500.023,165.5,,,,,10.00046,"""195""","""permanentní""","""> 10 km""","""2:00-5:00"""
790.0,1956-08-05 00:00:00,1956.0,31,218,8.16129,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Nürburg""",3,22,"""5 August 1956""","""XVIII Großer Preis von Deutsch…","""22 laps, 501.820 km (311.806 m…",,"""Permanent road course""",,,22.0,501.82,581.6,,,,,22.81,"""195""","""permanentní""","""> 10 km""","""> 5:00"""


Ad tankování: https://www.formula1-dictionary.net/refueling.html
Ad sdílení aut: https://forums.autosport.com/topic/48214-the-last-shared-drive-in-a-formula-one-grand-prix/

In [1075]:
doted = date(2024,12,31)

rules = pl.DataFrame(
    [
        {'zacatek':date(2018,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'ochranný rám halo', 'typ':'tech'},
        {'zacatek':date(1957,8,4),'konec':date(1957,8,4), 'podil': float(1), 'pravidlo': 'tankování během závodu', 'typ':'tech'},
        {'zacatek':date(1982,8,15),'konec':date(1983,12,31), 'podil': float(1), 'pravidlo': 'tankování během závodu', 'typ':'tech'},
        {'zacatek':date(1994,1,1),'konec':date(2009,12,31), 'podil': float(1), 'pravidlo': 'tankování během závodu', 'typ':'tech'},
        {'zacatek':date(2014,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'hybridní motory V6', 'typ':'tech'},
        {'zacatek':date(2011,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'DRS', 'typ':'tech'},
        {'zacatek':date(2016,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'povinné střídání směsí gum', 'typ':'tech'},
        {'zacatek':date(1994,5,15),'konec':doted, 'podil': float(1), 'pravidlo': 'rychlostní limit v boxech','typ':'sport'},
        {'zacatek':date(1986,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'nejvýš dva jezdci v týmu','typ':'sport'},
        {'zacatek':date(1950,1,1),'konec':date(1959,12,31), 'podil': float(1), 'pravidlo': 'bod za nejrychlejší kolo','typ':'sport'},
        {'zacatek':date(2019,1,1),'konec':date(2024,12,31), 'podil': float(1), 'pravidlo': 'bod za nejrychlejší kolo','typ':'sport'},
        {'zacatek':date(2008,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'zákaz náhradních vozů','typ':'sport'},
        {'zacatek':date(1950,1,1),'konec':date(1964,10,4), 'podil': float(1), 'pravidlo': 'střídání jezdů během závodu','typ':'sport'}

    ]
)

pravidla = pl.concat(
    [sc, vsc, rules], how='diagonal'
)

razeni = pravidla.group_by('pravidlo').agg(pl.col('zacatek').min()).sort(by='zacatek').select(pl.col('pravidlo')).to_series().to_list()

In [1141]:
alonso = pl.DataFrame([{'datum':date(2001,3,4),'text':'debut Fernanda Alonsa','sloupce':razeni}])

graf_pravidla1 = alt.Chart(pravidla.to_pandas()).mark_bar().encode(
    alt.X("zacatek:T"),
    alt.X2("konec:T"),
    alt.Y("pravidlo:N", sort=razeni),
    alt.Opacity("podil:Q"),
    alt.Color("typ:N")
)

graf_pravidla2 = alt.Chart(alonso.to_pandas()).mark_rule().encode(alt.X('datum:T'))
graf_pravidla3 = alt.Chart(alonso.to_pandas()).mark_text(text='debut Fernanda Alonsa', baseline='middle',).encode(alt.X('datum:T'))

graf_pravidla = graf_pravidla1 + graf_pravidla2 + graf_pravidla3

graf_pravidla

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [744]:
alt.Chart(rules.to_pandas()).mark_bar().encode(
    alt.X("zacatek:T"),
    alt.X2("konec:T"),
    alt.Y("pravidlo:N")
)

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
