In [605]:
import pandas as pd
import polars as pl
import altair as alt
pl.Config(tbl_rows=50)

<polars.config.Config at 0x1d82bbcf8f0>

In [606]:
from src.kristi_promin import kristi_promin
from src.me_to_neurazi import me_to_neurazi
from src.alt_friendly import alt_friendly

alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')

ThemeRegistry.enable('irozhlas')

In [607]:
df = pd.read_json("data/ergast-results.json")
df = pl.from_pandas(df)

In [608]:
races = pd.read_csv("data_raw/ergast/races.csv")
races = pl.from_pandas(races)

In [609]:
circuits = pd.read_csv("data_raw/ergast/circuits.csv")
circuits = pl.from_pandas(circuits)

## Calendar

In [611]:
from calendar import monthrange

In [612]:
def monthday(date_value):
    # Here date_value is a datetime.datetime object, not a Polars column
    month_length = monthrange(date_value.year, date_value.month)[1]
    return date_value.month + (date_value.day / month_length)

In [613]:
df = df.with_columns(pl.col("date").dt.ordinal_day().alias("day"))
df = df.with_columns(pl.col("date").dt.week().alias("week"))
df = df.with_columns(pl.col("date").map_elements(monthday, return_dtype=float).alias("monthday"))

asia = ['UAE','India','Bahrain','Malaysia', 'Qatar', 'China','Japan','Singapore',
             'Saudi Arabia','Korea']
namerica = ['USA','Canada','Mexico','United States']
samerica = ['Brazil', 'Argentina']
weurope = ['Netherlands','Portugal','Germany','UK','France','Belgium','Sweden','Switzerland','Italy','Spain','Monaco','Austria']
eeurope = ['Russia','Hungary','Azerbaijan','Turkey']
oceania = ['Australia']
africa = ['South Africa','Morocco']

street = ['Nevada','Las Vegas','Jeddah','Dallas','Montreal','Detroit','Phoenix','Monte-Carlo','Oporto','Adelaide','Baku','Marina Bay','Valencia','Miami']

nightIds = df.filter(pl.col("location").is_in(['Las Vegas','Jeddah','Marina Bay','Abu Dhabi','Al Daayen']) | ((pl.col('country') == 'Bahrain') & (pl.col('year') >= 2014))).select(pl.col('raceId')).to_series().to_list()

def get_continent(country): ## this is the ugliest thing ever writtern but I'm being sleepy and curious at the same time
    if country in asia:
        return "Asie"
    if country in namerica:
        return "S. Amerika"
    if country in samerica:
        return "J. Amerika"
    if country in weurope:
        return "záp. Evropa"
    if country in eeurope:
        return "vých. Evropa"
    if country in oceania:
        return "Austrálie"
    if country in africa:
        return "Afrika"

def get_type(location):
    if location in street:
        return "dočasný okruh"
    else:
        return "stálý okruh"

def get_light(id):
    if id in nightIds:
        return "umělé osvětlení"
    else:
        return "denní závod"

df = df.with_columns(pl.col('country').map_elements(get_continent, return_dtype=str).alias('region'))
df = df.with_columns(pl.col('location').map_elements(get_type, return_dtype=str).alias('type'))
df = df.with_columns(pl.col('raceId').map_elements(get_light, return_dtype=str).alias('osvětlení'))

In [614]:
def lap_length(course_description: str) -> float: 
    return float(course_description.split(' ')[0].split('[')[0].strip())

In [615]:
gps_wiki = pl.read_parquet('data/gps_wikipedia.parquet')

gps_wiki = gps_wiki.with_columns(pl.col('Course').map_elements(lap_length, return_dtype=float).alias('Lap_length')).with_columns(
    (pl.col('Lap_length') / pl.col('Fastest_lap') * 3600).alias('Fastest_lap_speed')
)

In [616]:
gps_wiki.columns

['raceId',
 'Date',
 'Official name',
 'Distance',
 'Scheduled distance',
 'Course',
 'Weather',
 'Attendance',
 'Laps',
 'Kms',
 'Fastest_lap',
 'Safety_car',
 'Virtual_safety_car',
 'Lap_length',
 'Fastest_lap_speed']

In [617]:
championship_races = df.group_by("raceId").agg(pl.col(["date","year","week","day","monthday","region","type","osvětlení","location"]).min()).join(
    df.group_by("raceId").agg(pl.col('constructor').unique().len().alias('constructors')), how='left', on='raceId').join(
    df.group_by("raceId").agg(pl.col('driverId').unique().len().alias('drivers')), how='left', on='raceId').join(
    gps_wiki, on='raceId', how='left'
    ).with_columns(
    (pl.col("Kms") / pl.col("Laps")).alias("Course lenght")
    ).with_columns(
    pl.col("year").map_elements(lambda x: str(x)[0:3], return_dtype=str).alias('decade')
    ).filter(~pl.col('location').is_null()).sort(by="date")

championship_races

umele = "permanentní"
mestske = "městská"
silnicni = "silniční"

def circuit_type(description):
    silnice = ["public","temporary road",'temporary street/road circuit']
    mesto = ["street",'purpose-built temporary circuit','albert park']
    okruh = ['permanent','grand prix circuit','yas marina','race track','korea','former military airbase','reims-gueux','airport facility']
    for x in silnice:
        if x in description.lower():
            return silnicni
    for x in mesto:
        if x in description.lower():
            return mestske
    for x in okruh:
        if x in description.lower():
            return umele
    print(f"Nerozumím: {description}")

championship_races = championship_races.with_columns(pl.col("Course").map_elements(circuit_type,return_dtype=str).alias('typ trati')).with_columns(pl.col('typ trati').fill_null(umele))
championship_races = championship_races.with_columns(
    pl.when(
        pl.col("location") == "Phoenix"
    ).then(
        pl.lit(mestske)
    ).when(
        pl.col('location') == 'Clermont-Ferrand'
    ).then(
        pl.lit(silnicni)
    ).otherwise(
        pl.col("typ trati")
    ).alias("typ trati")
)

def delkova_kategorie(kilometraz):
    if kilometraz <= 5:
        return "< 5 km"
    elif kilometraz <= 10:
        return "5-10 km"
    else:
        return "> 10 km"

championship_races = championship_races.with_columns(pl.col("Course lenght").map_elements(delkova_kategorie, return_dtype=str).alias("délka okruhu"))

def casova_kategorie(nejkolo):
    if nejkolo <= 90:
        return "< 1:30"
    elif nejkolo <= 120:
        return "1:30-2:00"
    elif nejkolo <= 300:
        return "2:00-5:00"
    elif nejkolo > 300:
        return "> 5:00"

championship_races = championship_races.with_columns(pl.col("Fastest_lap").map_elements(casova_kategorie, return_dtype=str).alias("nejrychlejší kolo"))
championship_races = championship_races.with_columns(pl.col("nejrychlejší kolo").fill_null("< 1:30"))

In [618]:
championship_races.filter(pl.col("Course").str.contains("(?i)street")).group_by("location").len().sort(by="len",descending=True)

location,len
str,u32
"""Monte-Carlo""",70
"""Montreal""",39
"""Melbourne""",26
"""Marina Bay""",15
"""Adelaide""",11
"""Baku""",8
"""California""",8
"""Detroit""",7
"""Barcelona""",6
"""Valencia""",5


In [619]:
championship_races.filter(pl.col("Course").str.contains("(?i)permanent")).group_by("location").len().sort(by="len",descending=True)

location,len
str,u32
"""Monza""",74
"""Silverstone""",59
"""Spa""",50
"""São Paulo""",41
"""Nürburg""",41
"""Budapest""",39
"""Spielberg""",38
"""Hockenheim""",37
"""Zandvoort""",34
"""Montmeló""",34


In [620]:
championship_races.filter(pl.col("délka okruhu").is_null())

raceId,date,year,week,day,monthday,region,type,osvětlení,location,constructors,drivers,Date,Official name,Distance,Scheduled distance,Course,Weather,Attendance,Laps,Kms,Fastest_lap,Safety_car,Virtual_safety_car,Lap_length,Fastest_lap_speed,Course lenght,decade,typ trati,délka okruhu,nejrychlejší kolo
f64,datetime[ns],f64,i8,i16,f64,str,str,str,str,u32,u32,str,str,str,str,str,str,str,f64,f64,f64,bool,bool,f64,f64,f64,str,str,str,str
761.0,1959-08-02 00:00:00,1959.0,31,214,8.064516,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Berlin""",5,15,"""2 August 1959""","""XXI Grosser Preis von Deutschl…","""2x30 laps, 498.00 km (309.42 m…",,"""Public road/Permanent racing f…","""Dry and dull.""",,,,124.5,,,,,,"""195""","""silniční""",,"""2:00-5:00"""
594.0,1974-05-12 00:00:00,1974.0,19,132,5.387097,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Brussels""",15,32,"""12 May 1974""",,,,"""Permanent racing facility""",,,,,71.31,,,,,,"""197""","""permanentní""",,"""< 1:30"""
597.0,1974-06-23 00:00:00,1974.0,25,174,6.766667,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Zandvoort""",14,27,"""23 June 1974""","""XXI Grand Prix Zandvoort""",,,"""Permanent racing facility""",,,,,81.44,,,,,,"""197""","""permanentní""",,"""< 1:30"""
601.0,1974-08-18 00:00:00,1974.0,33,230,8.580645,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Spielberg""",15,31,"""18 August 1974""","""XII Memphis Grand Prix von Öst…",,,"""Permanent racing facility""",,,,,97.22,,,,,,"""197""","""permanentní""",,"""1:30-2:00"""
445.0,1984-07-22 00:00:00,1984.0,29,204,7.709677,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Kent""",15,28,"""22 July 1984""","""XXXVII John Player British Gra…","""71 (aggregated: 11 + 60) laps,…","""75 laps, 315.450[4] or 315.457…","""Permanent racing facility""",,,,,73.191,,,,,,"""198""","""permanentní""",,"""< 1:30"""
221.0,1997-09-28 00:00:00,1997.0,39,271,9.933333,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Nürburg""",11,22,"""28 September 1997""","""Grosser Preis von Luxemburg 19…","""67 laps, between 305.233 and 3…",,"""Permanent racing facility""","""Partially cloudy, mild and dry""",,,,78.805,,,,,,"""199""","""permanentní""",,"""< 1:30"""
20.0,2008-04-06 00:00:00,2008.0,14,97,4.2,"""Asie""","""stálý okruh""","""denní závod""","""Sakhir""",11,22,"""6 April 2008""","""2008 Formula 1 Gulf Air Bahrai…","""57[2] laps, 308.238 km (191.53…",,"""Permanent racing facility""","""Dry""","""100,000 (Weekend)[3]""",,,93.193,,,,,,"""200""","""permanentní""",,"""1:30-2:00"""
34.0,2008-10-19 00:00:00,2008.0,42,293,10.612903,"""Asie""","""stálý okruh""","""denní závod""","""Shanghai""",10,20,"""19 October 2008""","""2008 Formula 1 Sinopec Chinese…","""56[2] laps, 305.066 km (189.55…",,"""Permanent Racing Facility""","""Dry""",,,,96.325,,,,,,"""200""","""permanentní""",,"""1:30-2:00"""
1063.0,2021-08-29 00:00:00,2021.0,34,241,8.935484,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Spa""",10,20,"""29 August 2021""","""Formula 1 Rolex Belgian Grand …","""1 lap, 6.880 km (4.275 miles)""","""44 laps, 308.052 km (191.415 m…","""Permanent racing facility""","""Wet; persistent rain""","""213,000[2]""",,,,True,True,,,,"""202""","""permanentní""",,"""< 1:30"""


In [621]:
from src.kristi_promin import kristi_promin
alt.themes.register('irozhlas', kristi_promin)
alt.themes.enable('irozhlas')

ThemeRegistry.enable('irozhlas')

In [789]:
gps_vysvetlivky = {
    '195':['1950-51 | Mistrovství světa začíná jako ryze evropský podnik, soustředěný do',
           'přelomu jara a léta. Oficiálně se do něj řadí i 500 mil Indianapolis, termínově', 
           'ale koliduje s evropskými závody a jezdci přelétají jen výjimečně.'],
    '196':['1960-61 | Pevnou součástí kalendáře se stává lednový závod v Jihoafrické',
           'republice a podzimní Velké ceny ve Spojených státech a Kanadě.',
          'Mistrovství světa začíná naplňovat svůj název.'],
    '197':['1970-79 | Volání jezdců po větší bezpečnosti vyústí v konec posledních',
          'tratí vedených po silnicích: Spa-Francorchamps (1970) a Charade (1972).',
          'Po vážné nehodě Nikiho Laudy končí i Nürburgring (1976), s téměř 23 km',
          'nejdelší okruh šampionátu. Dál už se jezdí na tratích podobných dnešním.'],
    '198':['1980-89 | Závody Formule 1 hledají americké publikum na městských tratích',
          'v Detroitu, Dallasu a Palm Beach. V roce 1986 se poprvé jede ve východním',
          'bloku, na Hungaroringu poblíž Budapešti. O rok později se F1 natrvalo vrací',
          'do Japonska. Kalendář přestává být roztahaný po celém roce, etabluje se',
          'dnešní formát od března do podzimu.'],
    '199':["1990-99 | Klidná dekáda. Velká cena Austrálie se přesouvá z konce sezony",
          "na její začátek a F1 poprvé proniká do kontinentální Asie – do Malajsie."],
    '200':['2000-09 | Přibývá závodů v Asii a na Blízkém východě. V Singapuru se',
          'v roce 2008 poprvé jede v noci pod umělým osvětlením, o rok později seriál',
          'poprvé končí v Abú Zabí, kde se závodí za setmění. V kalendáři se začíná',
          'zřetelně rýsovat srpnová pauza, umožňující lidem okolo F1 strávit při',
          'narůstajícím množství Velkých cen alespoň nějaký čas s rodinami.'],
    '201':['2010-19 | Začátek velkého návratu F1 do Severní Ameriky: do USA, Kanady',
          'i Mexika. Poprvé se jede v Rusku a Ázerbajdžánu. Stále nabitější kalendář',
           'se protahuje až do adventu.'],
    '202':['2020-25 | V seriálu mají historicky nejvyšší podíly noční závody a závody na',
          'městských okruzích. Dvě sezony poznamenává pandemie. Od roku 2021',
          'se na třech až šesti Velkých cenách jezdívá i kratší sobotní závod, sprint.']}

legenda = "right"

graf_kalendar = alt.Chart(
    championship_races.with_columns(pl.col("decade").replace_strict(gps_vysvetlivky), allow_object=True).to_pandas(),
    width=330,
    title=alt.Title("75 let MS F1: proměny kalendáře")
).mark_point(filled=True,
             color='#84c0e4'
).encode(
    alt.Y(
        "year:O", axis=alt.Axis(domainOpacity=0, tickColor='#DCDDD6'), title=None
    ),
    alt.X(
        "monthday:Q", axis=alt.Axis(
            domainOpacity=0, tickColor='#DCDDD6',
            labelAlign='center',    # Align labels to the right
            labelOffset=15,        # Add some spacing between tick and label
            labelPadding=5,
            labelExpr="['I.', 'II.', 'III.', 'IV.', 'V.', 'VI.', 'VII.', 'VIII.', 'IX.', 'X.', 'XI.', 'XII.'][parseInt(datum.value) - 1]"
        ), scale=alt.Scale(domainMax=12.9, domainMin=1), title=None
    ),
    alt.Color(
        'region:N', legend=alt.Legend(orient=legenda, direction="vertical"),
        scale=alt.Scale(range=['#687fa9','#84c0e4','#D74862','#994D56','#F2AB74','#789256','#b7d5a9']),
        sort=['záp. Evropa','vých. Evropa','S. Amerika','J. Amerika','Asie','Austrálie','Afrika']
    ),
#    alt.Size('Fastest_lap:Q', legend=alt.Legend(orient=legenda, direction="vertical")),
    alt.Size('nejrychlejší kolo:N', legend=alt.Legend(orient=legenda, direction="vertical"), sort=["< 1:30","1:30-2:00","2:00-5:00","> 5:00"], scale=alt.Scale(rangeMin=80,rangeMax=240)),
    alt.Shape('typ trati:N', scale=alt.Scale(
        range=['circle','diamond','triangle'],
    ), sort=[umele, mestske, silnicni], legend=alt.Legend(orient=legenda, direction="vertical")),
    alt.Stroke('osvětlení:N', 
               scale=alt.Scale(domain=['denní světlo', 'umělé osvětlení'], range=['white', '#a39889']), legend=alt.Legend(orient=legenda, direction="vertical")),
    alt.StrokeWidth('osvětlení:N',
                   scale=alt.Scale(domain=['denní závod', 'umělé osvětlení'], range=[0, 3]), legend=None),
    alt.Row('decade:O', title=None, header=alt.Header(labelAngle=0, labelAlign='left', labelAnchor='start', labelOrient='top', labelFont='Asap'))
).resolve_axis(y='independent',x='independent').resolve_scale(y='independent',color='shared').configure_view(
    stroke='transparent'
)

graf_kalendar

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df

In [793]:
kredity = "vizualizace: iROZHLAS.cz"

In [795]:
me_to_neurazi(graf_kalendar, kredity=kredity, soubor="kalendar")

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df

<figure>
    <a href="https://data.irozhlas.cz/grafy/kalendar.svg" target="_blank">
    <img src="https://data.irozhlas.cz/grafy/kalendar.svg" width="100%" alt="Graf s titulkem „75 let MS F1: proměny kalendáře“. Další texty by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>


In [791]:
help(me_to_neurazi)

Help on function me_to_neurazi in module src.me_to_neurazi:

me_to_neurazi(graf: altair.vegalite.v5.api.LayerChart, kredity: str, soubor: str, slozka='grafy', zvetseni=1.5, slozka_na_serveru='grafy')



## Cars

In [625]:
gps_wiki.sample(10)

raceId,Date,Official name,Distance,Scheduled distance,Course,Weather,Attendance,Laps,Kms,Fastest_lap,Safety_car,Virtual_safety_car,Lap_length,Fastest_lap_speed
f64,str,str,str,str,str,str,str,f64,f64,f64,bool,bool,f64,f64
330.0,"""12 August 1990""",,"""77 laps, 305.536 km (189.851 m…",,"""Permanent racing facility""","""Dry""",,77.0,305.536,82.058,,,,
534.0,"""17 June 1978""","""IX Swedish Grand Prix""","""70 laps, 282.170 km (175.332 m…",,"""Permanent racing facility""","""Sunny and warm""",,70.0,282.17,84.836,,,,
327.0,"""8 July 1990""","""Rhône-Poulenc Grand Prix de Fr…","""80 laps, 305.040 km (189.543 m…",,"""Permanent racing facility""","""Hot, dry, sunny""",,80.0,305.04,68.012,,,,
980.0,"""27 August 2017""","""2017 Formula 1 Pirelli Belgian…","""44 laps, 308.052 km (191.415 m…",,"""Permanent racing facility""","""Partially cloudy and dry""","""265,000[4]""",44.0,308.052,106.577,True,,,
844.0,"""8 May 2011""","""2011 Formula 1 DHL Turkish Gra…","""58 laps, 309.396 km (192.250 m…",,"""Permanent racing facility""","""Sunny, Fine and Dry[2] Air Tem…","""25,000""",58.0,309.396,89.703,,,,
1139.0,"""October 20, 2024""","""Formula 1 Pirelli United State…","""56 laps, 308.405 km (191.634 m…",,"""Permanent racing facility""","""Sunny""",,56.0,308.405,97.33,,,,
1066.0,"""26 September 2021""","""Formula 1 VTB Russian Grand Pr…","""53 laps, 309.745 km (192.467 m…",,"""Permanent racing facility""","""Cloudy and rainy""",,53.0,309.745,97.423,,,,
649.0,"""18 July 1970""","""XXIII RAC British Grand Prix""","""80 laps, 341.200 km (212.012 m…",,"""Permanent racing facility""",,,80.0,341.2,85.9,,,,
90.0,"""7 March 2004""","""2004 Foster's Australian Grand…","""58 laps, 307.574 km (191.118 m…",,"""Temporary street circuit""","""Dry and cloudyAir temperature …","""121,500[1]""",58.0,307.574,84.125,,,,
598.0,"""7 July 1974""",,"""80 laps, 263.12 km (163.495 mi…",,"""Permanent racing facility""",,,80.0,263.12,60.0,,,,


In [626]:
flaps = gps_wiki.join(
    races.with_columns(pl.col('raceId').cast(float)), how="left", on="raceId"
).group_by('year').agg(
    pl.col('Fastest_lap_speed').median().alias('median_speed'),
    pl.col('Fastest_lap_speed').min().alias('min_speed'),
    pl.col('Fastest_lap_speed').max().alias('max_speed')
).sort(by='year').unpivot(index="year")
flaps


year,variable,value
i64,str,f64
1950,"""median_speed""",
1951,"""median_speed""",
1952,"""median_speed""",
1953,"""median_speed""",
1954,"""median_speed""",
1955,"""median_speed""",
1956,"""median_speed""",
1957,"""median_speed""",
1958,"""median_speed""",
1959,"""median_speed""",


In [627]:
flaps2 = gps_wiki.join(
    races.with_columns(pl.col('raceId').cast(float)), how="left", on="raceId"
).group_by('year').agg(
    pl.col('Fastest_lap_speed').alias('all_speeds')
).sort(by='year').unpivot(index="year").explode('value')
flaps2

year,variable,value
i64,str,f64
1950,"""all_speeds""",
1950,"""all_speeds""",
1950,"""all_speeds""",
1950,"""all_speeds""",
1950,"""all_speeds""",
1950,"""all_speeds""",
1950,"""all_speeds""",
1951,"""all_speeds""",
1951,"""all_speeds""",
1951,"""all_speeds""",


In [629]:
rain = gps_wiki.filter(pl.col("Weather").str.contains("((?i)rain|(?i)wet)")).select(pl.col("Weather")).to_series().to_list()
rain

['Dry start, with heavy rain and thunderstorm/monsoon later',
 'Rain',
 'Wet, drying later.',
 'Dry at first; light rain in the final stages',
 'Rain, later drying',
 'Cloudy, rain in last 3 laps',
 'Heavy rain, dry towards the end',
 'Rain at beginning and end, otherwise drying',
 'Heavy Rain on Lap 1. Drying track at restart. Light Rain before the finish.',
 'Very heavy rain',
 'Intermittent light rain',
 'Cool and rainy with temperatures reaching up to 20\xa0°C (68\xa0°F)[4]',
 'Cloudy, wet track that progressively dried up.Rainy during the last few laps.',
 'Wet and dry',
 'Dry, light rain at the startAir Temp 34\xa0°C (93\xa0°F)',
 'Wet track at the start, dry later on',
 'Dry/Wet',
 'Rainy: 20\xa0°C (68\xa0°F)',
 'Dry/Wet, Air Temp: 21°C',
 'Rain and Thunderstorms at start, dry later; Air Temp at start: 32\xa0°C (90\xa0°F)',
 'Dry first, rain and thunderstorms later',
 'Dry at first, rain later on, Air: 11\xa0°C (52\xa0°F), Track: 12\xa0°C (54\xa0°F)',
 'Cloudy and raining; Air 1

In [630]:
flaps3 = gps_wiki.filter(~pl.col('Weather').is_in(rain)).join(
    races.with_columns(pl.col('raceId').cast(float)), how="left", on="raceId"
).group_by('year').agg(
    pl.col('Fastest_lap_speed').median().alias('median_speed'),
    pl.col('Fastest_lap_speed').min().alias('min_speed'),
    pl.col('Fastest_lap_speed').max().alias('max_speed')
).sort(by='year').unpivot(index="year")
flaps


year,variable,value
i64,str,f64
1950,"""median_speed""",
1951,"""median_speed""",
1952,"""median_speed""",
1953,"""median_speed""",
1954,"""median_speed""",
1955,"""median_speed""",
1956,"""median_speed""",
1957,"""median_speed""",
1958,"""median_speed""",
1959,"""median_speed""",


In [635]:
statuses = df.group_by('status').len().sort(by='len',descending=True).select(pl.col('status')).drop_nulls().to_series().to_list()

In [637]:
ok = ["Finished"] + [x for x in statuses if "Lap" in x]

In [638]:
dnq = ['Did not qualify','Did not prequalify','107% Rule']

In [639]:
not_ok = [x for x in statuses if x not in ok]

In [640]:
', '.join(not_ok)

'Engine, Accident, Did not qualify, Collision, Gearbox, Spun off, Suspension, Did not prequalify, Transmission, Electrical, Brakes, Withdrew, Clutch, Not classified, Fuel system, Disqualified, Turbo, Hydraulics, Overheating, Ignition, Oil leak, Throttle, Retired, Out of fuel, Halfshaft, Wheel, Oil pressure, Fuel pump, Differential, Collision damage, Tyre, Handling, Fuel leak, Steering, Radiator, Power Unit, Puncture, Wheel bearing, Injection, Fuel pressure, Water leak, Physical, Alternator, Exhaust, Chassis, Mechanical, Magneto, Driveshaft, Axle, Heat shield fire, Battery, Power loss, Oil pump, Distributor, Injury, Oil pipe, Broken wing, Electronics, Vibrations, Driver unwell, Rear wing, Water pressure, Wheel nut, 107% Rule, Excluded, Water pump, Injured, ERS, Technical, Supercharger, Front wing, Fuel, Undertray, Pneumatics, Water pipe, Wheel rim, Fire, Fuel pipe, Illness, Spark plugs, Fatal accident, Safety concerns, Stalled, Track rod, Oil line, Damage, Drivetrain, Fuel rig, Safety, 

In [641]:
qdf = df.filter(~pl.col('status').is_in(dnq))

In [642]:
zavady = ['Engine','Gearbox','Suspension','Transmission','Electrical','Brakes','Clutch','Fuel system','Turbo','Hydraulics','Overheating','Ignition','Oil leak','Throttle','Out of fuel', 'Halfshaft', 'Wheel','Oil pressure', 'Fuel pump', 'Differential',
         'Tyre', 'Handling', 'Fuel leak', 'Steering', 'Radiator', 'Power Unit', 'Puncture', 'Wheel bearing', 'Injection', 'Fuel pressure', 'Water leak', 'Physical', 'Alternator', 'Exhaust', 'Mechanical', 'Chassis', 'Magneto', 'Driveshaft', 'Axle', 'Heat shield fire', 'Battery', 'Power loss', 'Distributor', 'Oil pump',
         'Oil pipe','Electronics', 'Vibrations','Wheel nut','Water pressure','Water pump','ERS', 'Supercharger','Technical', 'Fuel', 'Pneumatics', 'Undertray', 'Water pipe','Fire', 'Spark plugs', 'Fuel pipe', 'Wheel rim','Drivetrain','Oil line', 'Damage', 'Fuel rig', 'Safety belt', 'Driver Seat', 'CV joint',
         'Cooling system','Launch control','Seat', 'Engine fire', 'Refuelling', 'Engine misfire', 'Crankshaft', 'Brake duct']

In [643]:
graf_dokonceni = qdf.group_by('year').len().rename({'len':'started'}).join(
    qdf.filter(pl.col('status').is_in(zavady)).group_by('year').len(), how='left', on='year'
).rename({'len':'porucha'}).drop_nulls().with_columns(pl.col('porucha') / pl.col('started')).sort(by='year').rename({'year':'rok'})
graf_dokonceni

rok,started,porucha
f64,u32,f64
1950.0,160,0.375
1951.0,179,0.418994
1952.0,204,0.348039
1953.0,246,0.402439
1954.0,229,0.384279
1955.0,177,0.378531
1956.0,189,0.391534
1957.0,166,0.355422
1958.0,226,0.367257
1959.0,181,0.359116


In [644]:
graf_dokonceni2 = qdf.filter(pl.col('grid') <= 10).group_by('year').len().rename({'len':'started'}).join(
    qdf.filter(pl.col('grid') <= 10).filter(pl.col('status').is_in(zavady)).group_by('year').len(), how='left', on='year'
).rename({'len':'porucha'}).drop_nulls().with_columns(pl.col('porucha') / pl.col('started')).sort(by='year').rename({'year':'rok'})
graf_dokonceni2

rok,started,porucha
f64,u32,f64
1950.0,75,0.413333
1951.0,85,0.223529
1952.0,83,0.253012
1953.0,103,0.359223
1954.0,104,0.355769
1955.0,84,0.25
1956.0,94,0.414894
1957.0,85,0.352941
1958.0,113,0.345133
1959.0,91,0.32967


In [645]:
alt.Chart(
    alt_friendly(graf_dokonceni)
).mark_line(
).encode(
    alt.X('rok:T'),
    alt.Y('porucha:Q')
)

In [884]:
graf_dokonceni2

rok,started,porucha
f64,u32,f64
1950.0,75,0.413333
1951.0,85,0.223529
1952.0,83,0.253012
1953.0,103,0.359223
1954.0,104,0.355769
1955.0,84,0.25
1956.0,94,0.414894
1957.0,85,0.352941
1958.0,113,0.345133
1959.0,91,0.32967


In [882]:
poruchy = alt.Chart(
    alt_friendly(graf_dokonceni2),
    title=alt.Title("Podíl odpadnutí ze závodů MS F1 kvůli technickým závadám"),
    width=350
).mark_line(color='#b45058'
).encode(
    alt.X('rok:T', title=None),
    alt.Y(
        'porucha:Q', 
        title=None, 
        axis=alt.Axis(labelExpr="datum.label * 100 + ' %'", orient='right', domainOpacity=0, tickColor='#DCDDD6')
)
).configure_axisX(
    grid=False
).configure_axisY(
    grid=True
).configure_view(
    stroke='transparent'
)

poruchy

## Pilots

In [650]:
alt.Chart(alt_friendly(df.group_by("year").agg(pl.col("driverId").unique().len()).sort(by="year").rename({"year":"rok"}))).mark_bar(
).encode(
    alt.X("rok:T"),
    alt.Y("driverId:Q")
)

In [651]:
alt.Chart(df.with_columns(
    pl.col("year").map_elements(lambda x: str(x)[0:3], return_dtype=str).alias('decade')
    ).group_by("decade").agg(pl.col("driverId").unique().len()).sort(by="decade")).mark_point(
).encode(
    alt.Y("decade:N"),
    alt.X("driverId:Q")
)

In [652]:
df.with_columns(
    pl.col("year").map_elements(lambda x: str(x)[0:3], return_dtype=str).alias('decade')
    ).group_by("decade").agg(pl.col("driverId").unique().len()).sort(by="decade")

decade,driverId
str,u32
,1
"""195""",332
"""196""",219
"""197""",173
"""198""",115
"""199""",105
"""200""",71
"""201""",66
"""202""",36


In [653]:
df.group_by('driverId').agg(pl.col('year').min()).group_by('year').agg(pl.col('driverId').unique().len()).sort(by='year').with_columns(
    pl.col("year").map_elements(lambda x: str(x)[0:3], return_dtype=str).alias('decade')
    ).group_by("decade").agg(pl.col("driverId").sum()).sort(by="decade")

decade,driverId
str,u32
,1
"""195""",332
"""196""",145
"""197""",135
"""198""",79
"""199""",66
"""200""",49
"""201""",41
"""202""",14


Co ukázat:
- kontinent (barva)
- počet závodů (průhlednost)
- mrtví (kroužek)
- ženy (tvar)

In [655]:
df.group_by("driverId").agg(pl.col("year").min()).drop_nulls().with_columns(
    pl.col("year").map_elements(lambda x: str(x)[0:3], return_dtype=str).alias('decade')
    )

driverId,year,decade
f64,f64,str
219.0,1974.0,"""197"""
184.0,1982.0,"""198"""
447.0,1961.0,"""196"""
805.0,1954.0,"""195"""
110.0,1980.0,"""198"""
643.0,1953.0,"""195"""
307.0,1971.0,"""197"""
25.0,1997.0,"""199"""
555.0,1950.0,"""195"""
761.0,1952.0,"""195"""


In [656]:
kvalifikovani = df.filter(~pl.col("status").is_in(dnq)).select(pl.col("name")).to_series().to_list()

## Pravidla

In [662]:
from datetime import date

In [663]:
sc = championship_races.filter(
    pl.col("Safety_car") == True
).group_by_dynamic(
    index_column="date",every="1y"
).agg(
    pl.col("raceId").unique().len()
).join(
    championship_races.group_by_dynamic(
        index_column="date",every="1y"
    ).agg(
        pl.col("raceId").unique().len()
    ), how="left", on="date"
).with_columns(
    (pl.col("raceId") / pl.col("raceId_right")).alias("podil")
).rename(
    {"date":"zacatek"}
).with_columns(
    pl.col("zacatek").map_elements(lambda x: date(x.year, 12, 31)
).alias("konec")
).with_columns(
    pl.col("zacatek").cast(date)
).with_columns(
    pl.lit("safety car").alias("pravidlo")
).with_columns(
    pl.lit("sport").alias("typ")
).drop(['raceId','raceId_right'])

  ).with_columns(


In [664]:
championship_races.filter(
    pl.col("Virtual_safety_car") == True
)

raceId,date,year,week,day,monthday,region,type,osvětlení,location,constructors,drivers,Date,Official name,Distance,Scheduled distance,Course,Weather,Attendance,Laps,Kms,Fastest_lap,Safety_car,Virtual_safety_car,Lap_length,Fastest_lap_speed,Course lenght,decade,typ trati,délka okruhu,nejrychlejší kolo
f64,datetime[ns],f64,i8,i16,f64,str,str,str,str,u32,u32,str,str,str,str,str,str,str,f64,f64,f64,bool,bool,f64,f64,f64,str,str,str,str
13.0,2009-09-13 00:00:00,2009.0,37,256,9.433333,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Monza""",10,20,"""13 September 2009 (2009-09-13)""","""Formula 1 Gran Premio Santande…","""53 laps, 306.720 km (190.58 mi…",,"""Permanent racing facility""","""Mainly sunny""",,53.0,306.72,84.739,true,true,,,5.78717,"""200""","""permanentní""","""5-10 km""","""< 1:30"""
914.0,2014-10-05 00:00:00,2014.0,40,278,10.16129,"""Asie""","""stálý okruh""","""denní závod""","""Suzuka""",11,22,"""5 October 2014""","""2014 Formula 1 Japanese Grand …","""44 laps, 255.208[n 1] km (158.…","""53 laps, 307.471 km (191.054 m…","""Permanent racing facility""","""Rain. Air: 20 °C (68 °F) Track…","""150,000[5]""",44.0,255.208,111.6,true,true,,,5.800182,"""201""","""permanentní""","""5-10 km""","""1:30-2:00"""
916.0,2014-11-02 00:00:00,2014.0,44,306,11.066667,"""S. Amerika""","""stálý okruh""","""denní závod""","""Austin""",9,18,"""2 November 2014""","""2014 Formula 1 United States G…","""56 laps, 308.405[n 1] km (191.…",,"""Permanent racing facility""","""Sunny""","""107,778""",56.0,308.405,101.379,true,true,,,5.507232,"""201""","""permanentní""","""5-10 km""","""1:30-2:00"""
917.0,2014-11-09 00:00:00,2014.0,45,313,11.3,"""J. Amerika""","""stálý okruh""","""denní závod""","""São Paulo""",9,18,"""9 November 2014 (2014-11-09)""","""Formula 1 Grande Prêmio Petrob…","""71 laps, 305.909[n 1] km (190.…",,"""Permanent racing facility""","""Sunny, Air: 23 to 24 °C (73 to…",,71.0,305.909,73.555,,true,,,4.308577,"""201""","""permanentní""","""< 5 km""","""< 1:30"""
918.0,2014-11-23 00:00:00,2014.0,47,327,11.766667,"""Asie""","""stálý okruh""","""umělé osvětlení""","""Abu Dhabi""",10,20,"""23 November 2014 (2014-11-23)""","""2014 Formula 1 Etihad Airways …","""55 laps, 305.355 km (189.739 m…",,"""Permanent racing facility""","""Clear skies; Air temp: 30 °C (…","""60,000[3][4]""",55.0,305.355,104.496,,true,,,5.551909,"""201""","""permanentní""","""5-10 km""","""1:30-2:00"""
926.0,2015-03-15 00:00:00,2015.0,11,74,3.483871,"""Austrálie""","""stálý okruh""","""denní závod""","""Melbourne""",9,18,"""15 March 2015 (2015-03-15)""","""2015 Formula 1 Rolex Australia…","""58 laps, 307.574 km (191.118 m…",,"""Temporary street circuit""","""Partly cloudy17–18 °C (63–64 °…","""296,000 (Weekend) 101,500 (Rac…",58.0,307.574,90.945,true,true,,,5.303,"""201""","""městská""","""5-10 km""","""1:30-2:00"""
931.0,2015-05-24 00:00:00,2015.0,21,144,5.774194,"""záp. Evropa""","""dočasný okruh""","""denní závod""","""Monte-Carlo""",10,20,"""24 May 2015""","""Formula 1 Grand Prix de Monaco…","""78 laps, 260.286 km (161.734 m…",,"""Street circuit""","""Partly cloudy18–19 °C (64–66 °…","""200,000 (Weekend)""",78.0,260.286,78.063,true,true,,,3.337,"""201""","""městská""","""< 5 km""","""< 1:30"""
934.0,2015-07-05 00:00:00,2015.0,27,186,7.16129,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Silverstone""",10,20,"""5 July 2015""","""2015 Formula 1 British Grand P…","""52 laps, 306.198 km (190.263 m…",,"""Permanent racing facility""","""Cloudy, rain at times17–20 °C …","""350,000 (Weekend) 140,000 (Rac…",52.0,306.198,97.093,true,true,,,5.888423,"""201""","""permanentní""","""5-10 km""","""1:30-2:00"""
936.0,2015-07-26 00:00:00,2015.0,30,207,7.83871,"""vých. Evropa""","""stálý okruh""","""denní závod""","""Budapest""",10,20,"""26 July 2015""","""Formula 1 Pirelli Magyar Nagyd…","""69 laps, 302.249 km (187.809 m…","""70 laps, 306.630 km (190.531 m…","""Permanent racing facility""","""Sunny22–24 °C (72–75 °F) air t…","""186,000 (Weekend) 73,000 (Race…",69.0,302.249,84.821,true,true,,,4.38042,"""201""","""permanentní""","""< 5 km""","""< 1:30"""
937.0,2015-08-23 00:00:00,2015.0,34,235,8.741935,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Spa""",10,20,"""23 August 2015""","""2015 Formula 1 Shell Belgian G…","""43 laps, 301.048 km (187.063 m…","""44 laps, 308.052 km (191.415 m…","""Permanent racing facility""","""Cloudy23 °C (73 °F) air temper…",,43.0,301.048,112.416,,true,,,7.001116,"""201""","""permanentní""","""5-10 km""","""1:30-2:00"""


In [665]:
vsc = championship_races.filter(
    pl.col("Virtual_safety_car") == True
).filter(pl.col('year') >= 2014).group_by_dynamic(
    index_column="date",every="1y"
).agg(
    pl.col("raceId").unique().len()
).join(
    championship_races.group_by_dynamic(
        index_column="date",every="1y"
    ).agg(
        pl.col("raceId").unique().len()
    ), how="left", on="date"
).with_columns(
    (pl.col("raceId") / pl.col("raceId_right")).alias("podil")
).rename(
    {"date":"zacatek"}
).with_columns(
    pl.col("zacatek").map_elements(lambda x: date(x.year, 12, 31)
).alias("konec")
).with_columns(
    pl.col("zacatek").cast(date)
).with_columns(
    pl.lit("virtuální safety car").alias("pravidlo")
).with_columns(
    pl.lit("sport").alias("typ")
).drop(['raceId','raceId_right'])


  ).with_columns(


In [666]:
df.group_by(["year","date","constructor"]).agg(pl.col("driverId").unique().len()).filter(pl.col("driverId") >= 3).sort(by='year',descending=True).head(5)

year,date,constructor,driverId
f64,datetime[ns],str,u32
1985.0,1985-08-04 00:00:00,"""Renault""",3
1984.0,1984-10-21 00:00:00,"""Renault""",3
1983.0,1983-09-25 00:00:00,"""Williams""",3
1982.0,1982-05-09 00:00:00,"""March""",3
1982.0,1982-07-03 00:00:00,"""March""",3


In [667]:
championship_races.filter(pl.col("Kms") > 500).sort(by="date",descending=True)

raceId,date,year,week,day,monthday,region,type,osvětlení,location,constructors,drivers,Date,Official name,Distance,Scheduled distance,Course,Weather,Attendance,Laps,Kms,Fastest_lap,Safety_car,Virtual_safety_car,Lap_length,Fastest_lap_speed,Course lenght,decade,typ trati,délka okruhu,nejrychlejší kolo
f64,datetime[ns],f64,i8,i16,f64,str,str,str,str,u32,u32,str,str,str,str,str,str,str,f64,f64,f64,bool,bool,f64,f64,f64,str,str,str,str
750.0,1960-06-19 00:00:00,1960.0,24,171,6.633333,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Spa""",5,19,"""19 June 1960""","""XXI Grote Prijs Van Belgie""","""36 laps, 508.32 km (315.864 mi…",,"""Grand Prix Circuit""",,,36.0,508.32,231.9,,,,,14.12,"""196""","""permanentní""","""> 10 km""","""2:00-5:00"""
748.0,1960-05-30 00:00:00,1960.0,22,151,5.967742,"""S. Amerika""","""stálý okruh""","""denní závod""","""Indianapolis""",10,33,"""May 30, 1960 (1960-5-30)""",,"""200 laps, 804.672 km (500.000 …",,"""Permanent racing facility""",,,200.0,804.672,,,,,,4.02336,"""196""","""permanentní""","""< 5 km""","""< 1:30"""
757.0,1959-05-30 00:00:00,1959.0,22,150,5.967742,"""S. Amerika""","""stálý okruh""","""denní závod""","""Indianapolis""",11,33,"""May 30, 1959 (1959-5-30)""",,"""200 laps, 804.672 km (500.000 …",,"""Permanent racing facility""",,,200.0,804.672,,,,,,4.02336,"""195""","""permanentní""","""< 5 km""","""< 1:30"""
768.0,1958-05-30 00:00:00,1958.0,22,150,5.967742,"""S. Amerika""","""stálý okruh""","""denní závod""","""Indianapolis""",7,33,"""May 30, 1958 (1958-5-30)""",,"""200 laps, 804.672 km (500.000 …",,"""Permanent racing facility""",,,200.0,804.672,,,,,,4.02336,"""195""","""permanentní""","""< 5 km""","""< 1:30"""
783.0,1957-09-08 00:00:00,1957.0,36,251,9.266667,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Monza""",3,19,"""8 September 1957""","""XXVIII Gran Premio d'Italia""","""87 laps, 500.25 km (310.84 mil…",,"""Permanent road course""",,,87.0,500.25,103.7,,,,,5.75,"""195""","""permanentní""","""5-10 km""","""1:30-2:00"""
781.0,1957-08-04 00:00:00,1957.0,31,216,8.129032,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Nürburg""",5,24,"""4 August 1957""","""XIX Großer Preis von Deutschla…","""22 laps, 501.820 km (311.806 m…",,"""Permanent road course""",,,22.0,501.82,557.4,,,,,22.81,"""195""","""permanentní""","""> 10 km""","""> 5:00"""
779.0,1957-07-07 00:00:00,1957.0,27,188,7.225806,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Rouen""",5,15,"""7 July 1957 (1957-7-7)""","""XLIII Grand Prix de l'ACF""","""77 laps, 503.734 km (313.005 m…",,"""Permanent racing facility""",,,77.0,503.734,142.4,,,,,6.542,"""195""","""permanentní""","""5-10 km""","""2:00-5:00"""
778.0,1957-05-30 00:00:00,1957.0,22,150,5.967742,"""S. Amerika""","""stálý okruh""","""denní závod""","""Indianapolis""",7,33,"""May 30, 1953 (1953-5-30)""",,"""200 laps, 804.672 km (500.000 …",,"""Permanent racing facility""",,,200.0,804.672,,,,,,4.02336,"""195""","""permanentní""","""< 5 km""","""< 1:30"""
791.0,1956-09-02 00:00:00,1956.0,35,246,9.066667,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Monza""",5,26,"""2 September 1956""","""XXVII Gran Premio d'Italia""","""50 laps, 500.023 km (310.700 m…",,"""Permanent road course""","""Cloudy, warm, alternating ligh…",,50.0,500.023,165.5,,,,,10.00046,"""195""","""permanentní""","""> 10 km""","""2:00-5:00"""
790.0,1956-08-05 00:00:00,1956.0,31,218,8.16129,"""záp. Evropa""","""stálý okruh""","""denní závod""","""Nürburg""",3,22,"""5 August 1956""","""XVIII Großer Preis von Deutsch…","""22 laps, 501.820 km (311.806 m…",,"""Permanent road course""",,,22.0,501.82,581.6,,,,,22.81,"""195""","""permanentní""","""> 10 km""","""> 5:00"""


Ad tankování: https://www.formula1-dictionary.net/refueling.html
Ad sdílení aut: https://forums.autosport.com/topic/48214-the-last-shared-drive-in-a-formula-one-grand-prix/

In [669]:
vsc

zacatek,podil,konec,pravidlo,typ
date,f64,date,str,str
2014-01-01,0.210526,2014-12-31,"""virtuální safety car""","""sport"""
2015-01-01,0.368421,2015-12-31,"""virtuální safety car""","""sport"""
2016-01-01,0.238095,2016-12-31,"""virtuální safety car""","""sport"""
2017-01-01,0.15,2017-12-31,"""virtuální safety car""","""sport"""
2018-01-01,0.380952,2018-12-31,"""virtuální safety car""","""sport"""
2019-01-01,0.285714,2019-12-31,"""virtuální safety car""","""sport"""
2020-01-01,0.352941,2020-12-31,"""virtuální safety car""","""sport"""
2021-01-01,0.227273,2021-12-31,"""virtuální safety car""","""sport"""
2022-01-01,0.590909,2022-12-31,"""virtuální safety car""","""sport"""
2023-01-01,0.454545,2023-12-31,"""virtuální safety car""","""sport"""


In [670]:
doted = date(2025,12,31)
bodovani = 'počet bodovaných míst'

rules = pl.DataFrame(
    [
        {'zacatek':date(2018,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'ochranný rám halo', 'typ':'tech'},
        {'zacatek':date(2018,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'ochranný rám halo', 'typ':'tech'},
#        {'zacatek':date(1957,8,4),'konec':date(1957,8,4), 'podil': float(1), 'pravidlo': 'tankování během závodu', 'typ':'tech'},
        {'zacatek':date(1982,8,15),'konec':date(1983,12,31), 'podil': float(0.7), 'pravidlo': 'tankování během závodu', 'typ':'tech'},
        {'zacatek':date(1994,1,1),'konec':date(2009,12,31), 'podil': float(1), 'pravidlo': 'tankování během závodu', 'typ':'tech'},
        {'zacatek':date(2014,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'hybridní motory V6', 'typ':'tech'},
        {'zacatek':date(2011,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'DRS usnadňující předjetí', 'typ':'tech'},
        {'zacatek':date(2016,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'povinné střídání směsí gum', 'typ':'tech'},
      #  {'zacatek':date(1994,5,15),'konec':doted, 'podil': float(1), 'pravidlo': 'rychlostní limit v boxech','typ':'sport'},
      #  {'zacatek':date(1986,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'maximálně dva jezdci v týmu','typ':'sport'},
        {'zacatek':date(1950,1,1),'konec':date(1959,12,31), 'podil': float(1), 'pravidlo': 'bod za nejrychlejší kolo','typ':'sport'},
        {'zacatek':date(2019,1,1),'konec':date(2024,12,31), 'podil': float(1), 'pravidlo': 'bod za nejrychlejší kolo','typ':'sport'},
        {'zacatek':date(2008,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'zákaz náhradních vozů','typ':'tech'},
        {'zacatek':date(1950,1,1),'konec':date(1964,10,4), 'podil': float(1), 'pravidlo': 'střídání jezdů během závodu','typ':'sport'},
        {'zacatek':date(2006,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'týmová rádia v TV přenosech','typ':'show'},
        {'zacatek':date(1969,6,29),'konec':doted, 'podil': float(1), 'pravidlo': 'stříkání šampaňským','typ':'show'},
        {'zacatek':date(1950,1,1),'konec':date(1986,9,7), 'podil': float(1), 'pravidlo': 'vavřínínový věnec pro vítěze','typ':'show'},
        # {'zacatek':date(1971,1,1),'konec':date(1971,12,31), 'podil': float(0.7), 'pravidlo': 'zpomalovací vůz / safety car','typ':'sport'},
        {'zacatek':date(1992,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'safety car (zpomalovací vůz)','typ':'sport'},
        {'zacatek':date(2014,11,1),'konec':doted, 'podil': float(1), 'pravidlo': 'virtuální safety car','typ':'sport'},
        {'zacatek':date(1985,8,4),'konec':doted, 'podil': float(1), 'pravidlo': 'záběry z vozů v TV přenosech','typ':'show'},
        {'zacatek':date(2016,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'divácká anketa o jezdce dne','typ':'show'},
        # {'zacatek':date(1958,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'Pohár konstruktérů','typ':'sport'},
        {'zacatek':date(1953,7,18),'konec':date(1994,12,31), 'podil': float(0.7), 'pravidlo': 'televizní přenosy','typ':'show','text':'nepravidelné/nekompletní'},
        {'zacatek':date(1995,1,1),'konec':doted, 'podil': float(1), 'pravidlo': 'televizní přenosy','typ':'show','text':'kompletní víkendy'},
        {'zacatek':date(1950,1,1),'konec':date(1959,12,31), 'podil':float(0.5), 'pravidlo': bodovani, 'typ':'sport', 'text':"5"},
        {'zacatek':date(1960,1,1),'konec':date(2002,12,31), 'podil':float(0.66), 'pravidlo': bodovani, 'typ':'sport', 'text':"6"},
        {'zacatek':date(2003,1,1),'konec':date(2009,12,31), 'podil':float(0.833), 'pravidlo': bodovani, 'typ':'sport', 'text':"8"},
        {'zacatek':date(2010,1,1),'konec':doted, 'podil':float(1), 'pravidlo': bodovani, 'typ':'sport','text':'10'},
        {'zacatek':date(1963,1,1),'konec':doted, 'podil':float(1), 'pravidlo': 'povinné helmy', 'typ':'sport'},
        {'zacatek':date(1963,1,1),'konec':doted, 'podil':float(1), 'pravidlo': 'barevné signalizační vlajky', 'typ':'sport'},
        {'zacatek':date(2019,3,8),'konec':doted, 'podil':float(1), 'pravidlo': 'seriál Touha po vítězství', 'typ':'show'},
        {'zacatek':date(2021,7,17),'konec':doted, 'podil':float(1), 'pravidlo': 'sobotní sprinty', 'typ':'sport'},
        {'zacatek':date(2021,1,1),'konec':doted, 'podil':float(1), 'pravidlo': 'rozpočtový strop pro týmy', 'typ':'sport'},
        {'zacatek':date(1968,1,1),'konec':date(2006,12,31), 'podil':float(1), 'pravidlo': 'reklamy na cigarety', 'typ':'show'},
        {'zacatek':date(1968,1,1),'konec':date(2017,12,31), 'podil':float(1), 'pravidlo': 'grid girls (hostesky na startu)', 'typ':'show', 'text':'nejasný začátek'},
    ]
)

#pravidla = pl.concat(
#    [sc, vsc, rules], how='diagonal'
#)

pravidla = rules.with_columns(
    pl.when(pl.col("text").is_null()).then(pl.lit("")).otherwise(pl.lit("(") + pl.col("text") + pl.lit(")")).alias("text")
).with_columns(
    pl.col("zacatek").dt.year().alias("prvni_rok")
).with_columns(
    pl.col("konec").dt.year().alias("posledni_rok")
).with_columns(
    pl.when(pl.col("prvni_rok").cast(str).str.head(2) == pl.col("posledni_rok").cast(str).str.head(2)).then(2).otherwise(4).alias("kolik_cisel")
).with_columns(
    pl.when(pl.col("prvni_rok") < 2019).then(pl.col("prvni_rok").cast(str)).otherwise(pl.lit("'") + pl.col("prvni_rok").cast(str).str.tail(2))
).with_columns(
    (pl.when(pl.col("posledni_rok") <= 2024).then(pl.lit("-") + pl.col("posledni_rok").cast(str).str.tail(pl.col("kolik_cisel"))).otherwise(pl.lit("")).alias("p_rok"))
).with_columns(
    pl.when(pl.col("pravidlo") != bodovani).then(
    (
        pl.col("prvni_rok") + pl.col("p_rok") + pl.lit(" ") + pl.col("text")).alias("text")).otherwise(pl.col("text")
                                                                                    )
)

razeni = pravidla.group_by('pravidlo').agg(pl.col('zacatek').min(), pl.col('konec').max()).sort(by=['zacatek','konec']).select(pl.col('pravidlo')).to_series().to_list()

razeni.remove(bodovani)

razeni = [bodovani] + razeni

In [800]:
alonso = pl.DataFrame([{'datum':date(2001,3,4),'text':'debut Fernanda Alonsa'}])
liberty = pl.DataFrame([{'datum':date(2017,1,23),'konec':doted,'text':'vstup Liberty Media do F1'}])

graf_pravidla1 = alt.Chart(pravidla.to_pandas(), width=260,
                          title=alt.Title("Proměny pravidel, zvyklostí a show")).mark_bar(opacity=1).encode(
    alt.X("zacatek:T", title=None),
    alt.X2("konec:T", title=None),
    alt.Y("pravidlo:N", title=None, sort=razeni, axis=alt.Axis(gridColor='#ccc',gridWidth=0.5)),
    alt.Opacity("podil:Q", scale=alt.Scale(range=[0.5, 1])),
    alt.Color("typ:N", scale=alt.Scale(range=['#b45058','#687fa9', '#789256']), sort=['sport','tech','show']),

    #    alt.Color("typ:N", scale=alt.Scale(range=['#84c0e4','#F2AB74','#b7d5a9']), sort=['sport','tech','show']),
    alt.Text('text:N')
)

graf_texty = alt.Chart(pravidla.fill_null("").to_pandas()).mark_text(
    align='left',
    dx=2.5,
    baseline='middle',
    fontSize=8.25,
    fontWeight='normal',
    font='Asap', 
    color='white' #1B1417  # You can adjust text color to ensure visibility
).encode(
    alt.X(
        'zacatek:T'
    ),
    alt.X2("konec:T"),
    y=alt.Y('pravidlo:N', sort=razeni),
    text=alt.Text('text:N')
)

alobarva = '#1B1417'

graf_pravidla2 = alt.Chart(
    alonso.to_pandas()).mark_rule(color = alobarva, opacity=0.5
                                 ).encode(alt.X('datum:T'))

graf_pravidla3 = alt.Chart(alonso.to_pandas()).mark_text(
    text=['V roce 2001 debutoval','Fernando Alonso,','nejzkušenější','stále aktivní','pilot F1 →'], 
    color=alobarva, 
    baseline='top',
    align='right', 
    dy=76, 
    dx=-8, 
    font='Asap', 
    fontSize=9
).encode(alt.X('datum:T'))

graf_pravidla4 = alt.Chart(liberty.to_pandas()).mark_rect(
    color='#e3d83b', 
    opacity=0.3  # Added opacity for better visibility
).encode(
    alt.X('datum:T'),
    alt.X2('konec:T'),
)

graf_pravidla5 = alt.Chart(liberty.to_pandas()).mark_text(
    text=['éra','Liberty','Media →'], 
    color='#1B1417', 
    baseline='top',
    align='right', 
    font='Asap', 
    fontSize=9,
    dx=-8,
    dy=195
).encode(alt.X('datum:T'))

# graf_pravidla4 = alt.Chart(liberty.to_pandas()).mark_area(color='red').encode(alt.X('datum:T'),alt.X2('konec:T'))
# graf_pravidla5 = alt.Chart(liberty.to_pandas()).mark_text(text=['vstup Liberty Media'], color=alobarva, baseline='top',align='right', dy=90, dx=-8, font='Asap', fontSize=9).encode(alt.X('datum:T'))


graf_pravidla = alt.layer(
    graf_pravidla4, graf_pravidla2, graf_pravidla1, graf_pravidla3,graf_pravidla5, graf_texty
).configure_axisX(
    grid=False
).configure_axisY(
    grid=True
).configure_view(
    stroke='transparent'
)

graf_pravidla

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [802]:
me_to_neurazi(graf_pravidla, soubor="pravidla", kredity=kredity)

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


<figure>
    <a href="https://data.irozhlas.cz/grafy/pravidla.svg" target="_blank">
    <img src="https://data.irozhlas.cz/grafy/pravidla.svg" width="100%" alt="Omlouváme se, ale alternativní text se nepodařilo vygenerovat. Texty v grafu by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>


## Pilots 2

In [673]:
drivers_eu = [
    'Russian', 
    'Hungarian', 
    'Czech',
    'Spanish',
    'Polish',
    'Danish',
    'Italian',
    'Finnish',
    'French',
    'British',
    'Swedish',
    'Monegasque',
    'Swiss',
    'Liechtensteiner',
    'Belgian',
    'Dutch',
    'Irish',
    'Portuguese',
    'East German',
    'Austrian', 
    'German']

drivers_na = [
    'Canadian',
    'Mexican',
    'American-Italian',
    'American'
]

drivers_sa = [
    'Chilean',
    'Brazilian',
    'Uruguayan',
    'Argentinian ',
    'Argentinian',
    'Venezuelan',
    'Argentine-Italian',
    'Colombian',
    'Argentine'
]

drivers_o = [
    'New Zealander',
    'Australian'
]

drivers_as = [
    'Malaysian',
    'Chinese',
    'Indonesian',
    'Japanese',
    'Thai',
    'Indian'
]

drivers_af = [
    'South African',
    'Rhodesian'
]

In [820]:
continents = df.group_by("name").agg(pl.col("nationality").first()).with_columns(
    pl.when(
        pl.col('nationality').is_in(drivers_eu)
).then(
        pl.lit('Evropa')
).when(
        pl.col('nationality').is_in(drivers_na)
).then(
        pl.lit('Sev. Amerika')
).when(
        pl.col('nationality').is_in(drivers_sa)
).then(
        pl.lit('J. Amerika')
).when(
        pl.col('nationality').is_in(drivers_as)
).then(
        pl.lit('Asie')
).when(
        pl.col('nationality').is_in(drivers_af)
).then(
        pl.lit('Afrika')
).when(
        pl.col('nationality').is_in(drivers_o)
).then(
        pl.lit('Oceánie')
).alias('kontinent')).select(pl.col(['name','kontinent']))

In [675]:
drivers = df.with_columns(
    (((pl.col("date") - pl.col("dob").str.to_datetime())).cast(int) / (24 * 60 * 60 * 1_000_000 * 365.25)).alias("age")
    ).join(
     debuts, how="left", on="name"
    )

In [676]:
race_count = df.group_by("name").agg(pl.col("raceId").unique().len().alias("raceCount"))

In [677]:
alt.Chart(
    drivers.with_columns(pl.col('age')).filter(pl.col('year').is_between(1950,1969)).group_by('age').len().to_pandas(),
    width=300
).mark_bar().encode(
    alt.X("age:N", scale=alt.Scale(domain=[x for x in range(17,60)])),
    alt.Y("len:Q")
)

In [678]:
alt.Chart(
    drivers.with_columns(pl.col('age').cast(int)).filter(pl.col('year').is_between(2005,2024)).group_by('age').len().to_pandas(),
    width=300
).mark_bar().encode(
    alt.X("age:N", scale=alt.Scale(domain=[x for x in range(17,60)])),
    alt.Y("len:Q")
)

In [679]:
champions = pl.read_csv("data_raw/titles.csv").select(pl.col("champion")).to_series().to_list()

In [680]:
df.sort(by="date").filter(pl.col("raceId") == 834)

resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,driverRef,code,forename,surname,dob,nationality,driverUrl,year,round,date,quali_date,quali_time,location,country,gp,status,constructor,constructorNationality,constructorRef,name,day,week,monthday,region,type,osvětlení
f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,str,str,str,str,str,f64,f64,str,str,str,str,str,str,str,f64,f64,datetime[ns],str,str,str,str,str,str,str,str,str,str,i16,i8,f64,str,str,str
20065.0,834.0,687.0,6.0,28.0,21.0,,"""W""",20.0,0.0,0.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,5.0,"""whitehead""","""\N""","""Peter""","""Whitehead""","""1914-11-12T00:00:00.000""","""British""","""http://en.wikipedia.org/wiki/P…",1950.0,2.0,1950-05-21 00:00:00,"""1950-05-21T00:00:00.000""","""\N""","""Monte-Carlo""","""Monaco""","""Monaco Grand Prix""","""Engine""","""Ferrari""","""Italian""","""ferrari""","""Peter Whitehead""",141,20,5.677419,"""záp. Evropa""","""dočasný okruh""","""denní závod"""
20047.0,834.0,647.0,6.0,40.0,7.0,2.0,"""2""",2.0,6.0,99.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,11.0,"""ascari""","""\N""","""Alberto""","""Ascari""","""1918-07-13T00:00:00.000""","""Italian""","""http://en.wikipedia.org/wiki/A…",1950.0,2.0,1950-05-21 00:00:00,"""1950-05-21T00:00:00.000""","""\N""","""Monte-Carlo""","""Monaco""","""Monaco Grand Prix""","""+1 Lap""","""Ferrari""","""Italian""","""ferrari""","""Alberto Ascari""",141,20,5.677419,"""záp. Evropa""","""dočasný okruh""","""denní závod"""
20049.0,834.0,793.0,6.0,42.0,9.0,4.0,"""4""",4.0,3.0,97.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,13.0,"""sommer""","""\N""","""Raymond""","""Sommer""","""1906-08-31T00:00:00.000""","""French""","""http://en.wikipedia.org/wiki/R…",1950.0,2.0,1950-05-21 00:00:00,"""1950-05-21T00:00:00.000""","""\N""","""Monte-Carlo""","""Monaco""","""Monaco Grand Prix""","""+3 Laps""","""Ferrari""","""Italian""","""ferrari""","""Raymond Sommer""",141,20,5.677419,"""záp. Evropa""","""dočasný okruh""","""denní závod"""
20053.0,834.0,633.0,6.0,38.0,6.0,,"""R""",8.0,0.0,63.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,109.0,"""villoresi""","""\N""","""Luigi""","""Villoresi""","""1909-05-16T00:00:00.000""","""Italian""","""http://en.wikipedia.org/wiki/L…",1950.0,2.0,1950-05-21 00:00:00,"""1950-05-21T00:00:00.000""","""\N""","""Monte-Carlo""","""Monaco""","""Monaco Grand Prix""","""Axle""","""Ferrari""","""Italian""","""ferrari""","""Luigi Villoresi""",141,20,5.677419,"""záp. Evropa""","""dočasný okruh""","""denní závod"""
20046.0,834.0,579.0,51.0,34.0,1.0,1.0,"""1""",1.0,9.0,100.0,"""3:13:18.7""","""11598700""","""\N""","""\N""","""\N""",,1.0,"""fangio""","""\N""","""Juan""","""Fangio""","""1911-06-24T00:00:00.000""","""Argentine""","""http://en.wikipedia.org/wiki/J…",1950.0,2.0,1950-05-21 00:00:00,"""1950-05-21T00:00:00.000""","""\N""","""Monte-Carlo""","""Monaco""","""Monaco Grand Prix""","""Finished""","""Alfa Romeo""","""Swiss""","""alfa""","""Juan Fangio""",141,20,5.677419,"""záp. Evropa""","""dočasný okruh""","""denní závod"""
20056.0,834.0,642.0,51.0,32.0,2.0,,"""R""",11.0,0.0,0.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,3.0,"""farina""","""\N""","""Nino""","""Farina""","""1906-10-30T00:00:00.000""","""Italian""","""http://en.wikipedia.org/wiki/N…",1950.0,2.0,1950-05-21 00:00:00,"""1950-05-21T00:00:00.000""","""\N""","""Monte-Carlo""","""Monaco""","""Monaco Grand Prix""","""Accident""","""Alfa Romeo""","""Swiss""","""alfa""","""Nino Farina""",141,20,5.677419,"""záp. Evropa""","""dočasný okruh""","""denní závod"""
20057.0,834.0,786.0,51.0,36.0,5.0,,"""R""",12.0,0.0,0.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,3.0,"""fagioli""","""\N""","""Luigi""","""Fagioli""","""1898-06-09T00:00:00.000""","""Italian""","""http://en.wikipedia.org/wiki/L…",1950.0,2.0,1950-05-21 00:00:00,"""1950-05-21T00:00:00.000""","""\N""","""Monte-Carlo""","""Monaco""","""Monaco Grand Prix""","""Accident""","""Alfa Romeo""","""Swiss""","""alfa""","""Luigi Fagioli""",141,20,5.677419,"""záp. Evropa""","""dočasný okruh""","""denní závod"""
20064.0,834.0,501.0,87.0,8.0,20.0,,"""R""",19.0,0.0,0.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,4.0,"""schell""","""\N""","""Harry""","""Schell""","""1921-06-29T00:00:00.000""","""American""","""http://en.wikipedia.org/wiki/H…",1950.0,2.0,1950-05-21 00:00:00,"""1950-05-21T00:00:00.000""","""\N""","""Monte-Carlo""","""Monaco""","""Monaco Grand Prix""","""Collision""","""Cooper""","""British""","""cooper""","""Harry Schell""",141,20,5.677419,"""záp. Evropa""","""dočasný okruh""","""denní závod"""
20055.0,834.0,498.0,105.0,2.0,3.0,,"""R""",10.0,0.0,1.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,3.0,"""gonzalez""","""\N""","""José Froilán""","""González""","""1922-10-05T00:00:00.000""","""Argentine""","""http://en.wikipedia.org/wiki/J…",1950.0,2.0,1950-05-21 00:00:00,"""1950-05-21T00:00:00.000""","""\N""","""Monte-Carlo""","""Monaco""","""Monaco Grand Prix""","""Accident""","""Maserati""","""Italian""","""maserati""","""José Froilán González""",141,20,5.677419,"""záp. Evropa""","""dočasný okruh""","""denní závod"""
20060.0,834.0,640.0,105.0,52.0,12.0,,"""R""",15.0,0.0,0.0,"""\N""","""\N""","""\N""","""\N""","""\N""",,3.0,"""graffenried""","""\N""","""Toulo""","""de Graffenried""","""1914-05-18T00:00:00.000""","""Swiss""","""http://en.wikipedia.org/wiki/T…",1950.0,2.0,1950-05-21 00:00:00,"""1950-05-21T00:00:00.000""","""\N""","""Monte-Carlo""","""Monaco""","""Monaco Grand Prix""","""Accident""","""Maserati""","""Italian""","""maserati""","""Toulo de Graffenried""",141,20,5.677419,"""záp. Evropa""","""dočasný okruh""","""denní závod"""


In [681]:
first_seasons_participants = df.filter(
    pl.col('year') == 1950
).select(
    pl.col('name')
).to_series().to_list()

first_debutants = df.filter(
    pl.col('year') > 1950
).filter(
    pl.col("location") != "Indianapolis"
).filter(
    ~pl.col('name').is_in(first_seasons_participants)
).group_by('name').agg(pl.col('date').min()).sort(by='date').head(50).select(pl.col('name')).to_series().to_list()

In [682]:
first_debutants

['Stirling Moss',
 'Peter Hirt',
 'Rudi Fischer',
 'George Abecassis',
 'André Pilette',
 'Aldo Gordini',
 'André Simon',
 'Onofre Marimón',
 'Duncan Hamilton',
 'John James',
 'Philip Fotheringham-Parker',
 'Jacques Swaters',
 'Ken Richardson',
 'Chico Landi',
 'Juan Jover',
 'Georges Grignard',
 'Paco Godia',
 'Ken Wharton',
 'Lance Macklin',
 'Alan Brown',
 'Hans von Stuck',
 'Eric Brandon',
 'Peter Collins',
 'Toni Ulmen',
 'Max de Terra',
 'Jean Behra',
 'Paul Frère',
 "Robert O'Brien",
 'Robin Montgomerie-Charrington',
 'Charles de Tornaco',
 'Arthur Legat',
 'Mike Hawthorn',
 'Roger Laurent',
 'Tony Gaze',
 'Piero Carini',
 'Bill Aston',
 'Ken Downing',
 'Eitel Cantoni',
 'Gino Bianco',
 'Dennis Poore',
 'Eric Thompson',
 'Kenneth McAlpine',
 'Roy Salvadori',
 'Graham Whitehead',
 'Tony Crook',
 'Marcel Balsa',
 'Rudolf Krause',
 'Adolf Brudes',
 'Josef Peters',
 'Ernst Klodwig']

In [683]:
obdobi1 = f"debutanti let {int(debuts.filter(pl.col("name").is_in(first_debutants)).select(pl.col("debut")).min().item())} až {int(debuts.filter(pl.col("name").is_in(first_debutants)).select(pl.col("debut")).max().item())}" 
obdobi1

'debutanti let 1951 až 1952'

In [956]:
last_seasons_participants = df.filter(
    pl.col('year') >= 2023
).select(
    pl.col('name')
).to_series().to_list()

gone_by_2023 = df.filter(~pl.col('name').is_in(last_seasons_participants)).select(pl.col('name')).to_series().to_list()

last_debutants = df.filter(pl.col('name').is_in(gone_by_2023)).group_by('name').agg(pl.col('date').first()).sort(by='date').tail(50).select(pl.col('name')).to_series().to_list()

last_century_drivers = df.filter(pl.col('year') <= 2000).select(pl.col('name')).to_series().to_list()

this_century_debutants = df.filter(~pl.col('name').is_in(last_century_drivers)).group_by('name').agg(pl.col('date').first()).sort(by='date').head(50).select(pl.col('name')).to_series().to_list()

this_century_debutants_all = df.filter(~pl.col('name').is_in(last_century_drivers)).group_by('name').agg(pl.col('date').first()).sort(by='date').select(pl.col('name')).to_series().to_list()

In [685]:
last_debutants

['Vitantonio Liuzzi',
 'Sébastien Bourdais',
 'Adrian Sutil',
 'Timo Glock',
 'Sébastien Buemi',
 'Nico Rosberg',
 'Nick Heidfeld',
 'Heikki Kovalainen',
 'Robert Kubica',
 'Nelson Piquet Jr.',
 'Kazuki Nakajima',
 'Romain Grosjean',
 'Luca Badoer',
 'Kamui Kobayashi',
 'Jenson Button',
 'Rubens Barrichello',
 'Karun Chandhok',
 'Jaime Alguersuari',
 'Vitaly Petrov',
 'Lucas di Grassi',
 "Jérôme d'Ambrosio",
 'Pastor Maldonado',
 'Jean-Éric Vergne',
 'Bruno Senna',
 'Charles Pic',
 'Esteban Gutiérrez',
 'Giedo van der Garde',
 'Max Chilton',
 'Jules Bianchi',
 'Daniil Kvyat',
 'Felipe Massa',
 'André Lotterer',
 'Will Stevens',
 'Felipe Nasr',
 'Marcus Ericsson',
 'Roberto Merhi',
 'Alexander Rossi',
 'Jolyon Palmer',
 'Rio Haryanto',
 'Antonio Giovinazzi',
 'Stoffel Vandoorne',
 'Pascal Wehrlein',
 'Paul di Resta',
 'Sergey Sirotkin',
 'Brendon Hartley',
 'Nicholas Latifi',
 'Pietro Fittipaldi',
 'Jack Aitken',
 'Mick Schumacher',
 'Nikita Mazepin']

In [952]:
drivers.filter(pl.col("name").is_in(first_debutants)).group_by("name").agg(pl.col("age").min()).mean()

name,age
str,f64
,36.731554


In [687]:
first_gp_drivers = df.filter(pl.col("location") != "Indianapolis").filter(pl.col("year") < 1953).select(pl.col("name")).to_series().to_list()
first_gp_drivers

['Piero Taruffi',
 'Rudi Fischer',
 'Nino Farina',
 'Alberto Ascari',
 'Alberto Ascari',
 'Alberto Ascari',
 'Nino Farina',
 'Alberto Ascari',
 'Rudi Fischer',
 'Luigi Villoresi',
 'Nino Farina',
 'Alberto Ascari',
 'Luigi Villoresi',
 'Nino Farina',
 'Alberto Ascari',
 'Piero Taruffi',
 'Luigi Villoresi',
 'Alberto Ascari',
 'José Froilán González',
 'Alberto Ascari',
 'José Froilán González',
 'José Froilán González',
 'Luigi Villoresi',
 'Piero Taruffi',
 'Alberto Ascari',
 'José Froilán González',
 'Alberto Ascari',
 'José Froilán González',
 'Alberto Ascari',
 'Dorino Serafini',
 'Louis Rosier',
 'Piero Taruffi',
 'Luigi Villoresi',
 'Peter Whitehead',
 'Louis Rosier',
 'Piero Carini',
 'Charles de Tornaco',
 'Rudi Fischer',
 'Peter Whitehead',
 'Peter Whitehead',
 'Luigi Villoresi',
 'Alberto Ascari',
 'Clemente Biondetti',
 'Alberto Ascari',
 'Alberto Ascari',
 'Louis Rosier',
 'Piero Taruffi',
 'Chico Landi',
 'Nino Farina',
 'Piero Taruffi',
 'Piero Taruffi',
 'André Simon',
 

In [958]:
drivers.filter(pl.col("name").is_in(this_century_debutants_all)).group_by("name").agg(pl.col("age").min()).mean()

name,age
str,f64
,23.605804


In [968]:
drivers.filter(pl.col("name").is_in(this_century_debutants_all)).group_by("name").agg(pl.col("kontinent").first()).group_by("kontinent").len()

kontinent,len
str,u32
"""Oceánie""",6
"""Jižní Amerika""",12
"""Severní Amerika""",7
"""Evropa""",63
"""Asie""",12


In [978]:
drivers.filter(pl.col("kontinent") == "Afrika").group_by(["name","nationality"]).agg(pl.col("date").max()).sort(by="date")

name,nationality,date
str,str,datetime[ns]
"""Gary Hocking""","""Rhodesian""",1962-12-29 00:00:00
"""Syd van der Vyver""","""South African""",1962-12-29 00:00:00
"""Mike Harris""","""South African""",1962-12-29 00:00:00
"""Bruce Johnstone""","""South African""",1962-12-29 00:00:00
"""Doug Serrurier""","""South African""",1965-01-01 00:00:00
"""Ray Reed""","""South African""",1965-01-01 00:00:00
"""Clive Puzey""","""Rhodesian""",1965-01-01 00:00:00
"""Neville Lederle""","""South African""",1965-01-01 00:00:00
"""Brausch Niemann""","""South African""",1965-01-01 00:00:00
"""David Clapham""","""South African""",1965-01-01 00:00:00


In [689]:
drivers.filter(pl.col("name").is_in(last_debutants)).group_by("name").agg(pl.col("age").min()).median()

name,age
str,f64
,23.315537


In [690]:
obdobi2 = f"debutanti let {int(debuts.filter(pl.col("name").is_in(this_century_debutants)).select(pl.col("debut")).min().item())} až {int(debuts.filter(pl.col("name").is_in(this_century_debutants)).select(pl.col("debut")).max().item())}"
obdobi2

'debutanti let 2001 až 2011'

In [1088]:
debuts = df.group_by("name").agg(pl.col("year").min().alias("debut"))

winners = df.filter(pl.col("position") == 1).select(pl.col("name")).to_series().to_list()
podium_finishers = df.filter(pl.col("position") <= 3).select(pl.col("name")).to_series().to_list()
top6_finishers = df.filter(pl.col("position") <= 6).select(pl.col("name")).to_series().to_list()
top10_finishers = df.filter(pl.col("position") <= 10).select(pl.col("name")).to_series().to_list()
points_finishers = df.filter(pl.col("points") > 0).select(pl.col("name")).to_series().to_list()
finishers = df.filter(~pl.col("position").is_null()).select(pl.col("name")).to_series().to_list()

poradi = [
    "mistrovský titul",
    "vítězství",
    "stupně vítězů",
    "top 6",
    "top 10",
    "dojeli do cíle",
    "nedojeli do cíle"
]

debuts = debuts.with_columns(
    pl.when(
        pl.col("name").is_in(champions)
    ).then(
        pl.lit(poradi[0])
    ).when(
        pl.col("name").is_in(winners)
    ).then(
        pl.lit(poradi[1])
    ).when(
        pl.col("name").is_in(podium_finishers)
    ).then(
        pl.lit(poradi[2])
    ).when(
        pl.col("name").is_in(top6_finishers)
    ).then(
        pl.lit(poradi[3])
    ).when(
        pl.col("name").is_in(top10_finishers)
    ).then(
        pl.lit(poradi[4])
    ).when(
        pl.col('name').is_in(finishers)
    ).then(
        pl.lit(poradi[5])
    ).otherwise(
        pl.lit(poradi[6])
    ).alias(
        "achievement"
)).with_columns(
    pl.when(
        pl.col('name').is_in(first_debutants)
    ).then(
        pl.lit(obdobi1)
    ).when(
        pl.col('name').is_in(this_century_debutants)
    ).then(
        pl.lit(obdobi2)
    ).alias("cohort")
).with_columns(
    pl.struct("cohort","achievement").alias("cohort_achievement")
).with_columns(
    pl.col("cohort_achievement").rank("ordinal").over("cohort_achievement").alias("cohort_rank")
).with_columns(
    pl.col("debut").rank("ordinal").over("debut").alias("year_rank")
).join(
    continents, how="left", on="name"
).join(
    race_count, how="left", on="name"
).with_columns(
    pl.when(
        pl.col("raceCount") < 10
    ).then(
        pl.lit("< 10")
    ).when(
        pl.col("raceCount") < 100
    ).then(
        pl.lit("10-99")
    ).when(
        pl.col("raceCount") >= 10
    ).then(
        pl.lit(">= 100")
    ).alias("celkem závodů")
)

In [692]:
this_century_debutants

['Tomáš Enge',
 'Enrique Bernoldi',
 'Allan McNish',
 'Alex Yoong',
 'Ralph Firman',
 'Justin Wilson',
 'Cristiano da Matta',
 'Zsolt Baumgartner',
 'Nicolas Kiesa',
 'Gianmaria Bruni',
 'Giorgio Pantano',
 'Patrick Friesacher',
 'Antônio Pizzonia',
 'Narain Karthikeyan',
 'Kimi Räikkönen',
 'Juan Pablo Montoya',
 'Mark Webber',
 'Christian Klien',
 'Yuji Ide',
 'Scott Speed',
 'Franck Montagny',
 'Tiago Monteiro',
 'Sakon Yamamoto',
 'Robert Doornbos',
 'Fernando Alonso',
 'Christijan Albers',
 'Anthony Davidson',
 'Takuma Sato',
 'Sebastian Vettel',
 'Markus Winkelhock',
 'Vitantonio Liuzzi',
 'Sébastien Bourdais',
 'Adrian Sutil',
 'Nico Rosberg',
 'Timo Glock',
 'Sébastien Buemi',
 'Lewis Hamilton',
 'Heikki Kovalainen',
 'Robert Kubica',
 'Nelson Piquet Jr.',
 'Kazuki Nakajima',
 'Romain Grosjean',
 'Kamui Kobayashi',
 'Jaime Alguersuari',
 'Karun Chandhok',
 'Nico Hülkenberg',
 'Vitaly Petrov',
 'Lucas di Grassi',
 "Jérôme d'Ambrosio",
 'Pastor Maldonado']

In [838]:
df.filter(pl.col("name").is_in(first_debutants)).group_by("name").agg(pl.col("date").min()).select(pl.col("date")).min()

date
datetime[ns]
1951-05-27 00:00:00


In [840]:
df.filter(pl.col("name").is_in(first_debutants)).group_by("name").agg(pl.col("date").min()).select(pl.col("date")).max()

date
datetime[ns]
1952-08-03 00:00:00


In [848]:
df.filter(pl.col("name").is_in(this_century_debutants)).group_by("name").agg(pl.col("date").min()).select(pl.col("date")).min()

date
datetime[ns]
2001-03-04 00:00:00


In [850]:
df.filter(pl.col("name").is_in(this_century_debutants)).group_by("name").agg(pl.col("date").min()).select(pl.col("date")).max()

date
datetime[ns]
2011-03-27 00:00:00


In [693]:
debuts.filter(pl.col("name") == "Mike Fisher")

name,debut,achievement,cohort,cohort_achievement,cohort_rank,year_rank,kontinent
str,f64,str,str,struct[2],u32,u32,str
"""Mike Fisher""",1967.0,"""aspoň jednou v cíli""",,"{null,""aspoň jednou v cíli""}",88,16,"""Severní Amerika"""


In [1094]:
porovnani_jezdcu = alt.Chart(
    debuts.filter(~pl.col("cohort").is_null()).to_pandas(),
    title=alt.Title("50 jezdců z prvních let MS F1 versus 50 jezdců z 21. století",
                   subtitle=[
                       "Pro reprezentativní výběr debutantů jsme sáhli až do druhé a třetí",
                       "sezony mistrovství světa Formule 1, neboť do té první zasáhlo mnoho ",
                       "závodníků se zkušenostmi z předválečných velkých cen a nešlo o nováčky.",
                       "Proti nim jsme postavili relativně dávné debutanty ze začátku tohoto století,", 
                       "protože mladíci z posledních sezon ještě neměli příležitost se plně projevit."
                   ])
).mark_point(filled=True).encode(
    alt.X("cohort_rank:N", title=None, axis=alt.Axis(labels=False, tickColor='white')),
    alt.Y("achievement", title=None, sort=poradi),
    alt.Column('cohort:N', sort=[obdobi1, obdobi2], title=None, spacing=3, header=alt.Header(labelFont='Asap')), 
    alt.Color(
        "kontinent:N",
        scale=alt.Scale(
            range=['#687fa9','#D74862','#994D56','#F2AB74','#789256','#b7d5a9']
        ),
        sort=[
            'Evropa','Sev. Amerika','J. Amerika','Asie','Austrálie','Afrika'
        ], 
        legend=alt.Legend(
            titleOrient="left", orient="none", legendX=-85, legendY=-70, direction="horizontal")
    ),
    alt.Size(
        "celkem závodů:N", 
        sort=["< 10","10-99","> 100"], 
        scale=alt.Scale(range=[30,300,3000]), 
        legend=alt.Legend(
            titleOrient="left", orient="none", legendX=-85, legendY=-55, direction="horizontal")
    )
    
).configure_view(
    stroke='transparent'
).configure_axis(
    grid=False
).properties(
    width=130,
    height=180
)

porovnani_jezdcu

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [1082]:
me_to_neurazi(porovnani_jezdcu, kredity=kredity, soubor="porovnani_jezdcu")

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


<figure>
    <a href="https://data.irozhlas.cz/grafy/porovnani_jezdcu.svg" target="_blank">
    <img src="https://data.irozhlas.cz/grafy/porovnani_jezdcu.svg" width="100%" alt="Graf s titulkem „50 jezdců z prvních let MS F1 versus 50 jezdců z 21. století“. Další texty by měly být čitelné ze zdrojového souboru SVG." />
    </a>
    </figure>


In [986]:
debuts.filter(pl.col("achievement") == "ani jednou v cíli").sort(by="debut").tail(50)

name,debut,achievement,cohort,cohort_achievement,cohort_rank,year_rank,kontinent,raceCount,závodů
str,f64,str,str,struct[2],u32,u32,str,u32,str
"""Roelof Wunderink""",1975.0,"""ani jednou v cíli""",,"{null,""ani jednou v cíli""}",89,13,"""Evropa""",6,"""< 10"""
"""Hiroshi Fushida""",1975.0,"""ani jednou v cíli""",,"{null,""ani jednou v cíli""}",139,14,"""Asie""",2,"""< 10"""
"""Emilio Zapico""",1976.0,"""ani jednou v cíli""",,"{null,""ani jednou v cíli""}",30,3,"""Evropa""",1,"""< 10"""
"""Otto Stuppacher""",1976.0,"""ani jednou v cíli""",,"{null,""ani jednou v cíli""}",53,5,"""Evropa""",3,"""< 10"""
"""Jac Nelleman""",1976.0,"""ani jednou v cíli""",,"{null,""ani jednou v cíli""}",56,6,"""Evropa""",1,"""< 10"""
"""Conny Andersson""",1976.0,"""ani jednou v cíli""",,"{null,""ani jednou v cíli""}",95,10,"""Evropa""",5,"""< 10"""
"""Boy Lunger""",1976.0,"""ani jednou v cíli""",,"{null,""ani jednou v cíli""}",128,12,"""Evropa""",7,"""< 10"""
"""Divina Galica""",1976.0,"""ani jednou v cíli""",,"{null,""ani jednou v cíli""}",131,13,"""Evropa""",3,"""< 10"""
"""Andy Sutcliffe""",1977.0,"""ani jednou v cíli""",,"{null,""ani jednou v cíli""}",14,2,"""Evropa""",1,"""< 10"""
"""Hans Heyer""",1977.0,"""ani jednou v cíli""",,"{null,""ani jednou v cíli""}",46,4,"""Evropa""",1,"""< 10"""
