In [176]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, mean_squared_error
import pickle

In [161]:
df = pd.read_csv('RidersSummary.csv')
df.head()

Unnamed: 0,bike_number,rider_name,class,season,home_country,motorcycle,team,races_participated,wins,podium,pole,fastest_lap,points,placed,world_championships
0,1,Jorge Martin,Moto3,2015,Spain,Mahindra,Mapfre Team Mahindra,18,0,0,0,0,45,17,0
1,1,Jorge Martin,Moto3,2016,Spain,Mahindra,Pull & Bear Aspar Mahindra Team,16,0,1,0,0,72,16,0
2,1,Jorge Martin,Moto3,2017,Spain,Honda,Del Conca Gresini Moto3,16,1,9,9,2,196,4,0
3,1,Jorge Martin,Moto3,2018,Spain,Honda,Del Conca Gresini Moto3,17,7,10,11,3,260,1,1
4,1,Jorge Martin,Moto2,2019,Spain,KTM,Red Bull KTM Ajo,19,0,2,0,1,94,11,0


In [162]:
df = df.drop(columns=['bike_number'])

In [163]:
df.head()

Unnamed: 0,rider_name,class,season,home_country,motorcycle,team,races_participated,wins,podium,pole,fastest_lap,points,placed,world_championships
0,Jorge Martin,Moto3,2015,Spain,Mahindra,Mapfre Team Mahindra,18,0,0,0,0,45,17,0
1,Jorge Martin,Moto3,2016,Spain,Mahindra,Pull & Bear Aspar Mahindra Team,16,0,1,0,0,72,16,0
2,Jorge Martin,Moto3,2017,Spain,Honda,Del Conca Gresini Moto3,16,1,9,9,2,196,4,0
3,Jorge Martin,Moto3,2018,Spain,Honda,Del Conca Gresini Moto3,17,7,10,11,3,260,1,1
4,Jorge Martin,Moto2,2019,Spain,KTM,Red Bull KTM Ajo,19,0,2,0,1,94,11,0


In [164]:
df.isnull().sum()

rider_name             0
class                  0
season                 0
home_country           0
motorcycle             0
team                   0
races_participated     0
wins                   0
podium                 0
pole                   0
fastest_lap            0
points                 0
placed                 0
world_championships    0
dtype: int64

In [165]:
df.shape

(274, 14)

In [166]:
df['class'].unique()

array(['Moto3', 'Moto2', 'MotoGP', '125cc', 'MotoE'], dtype=object)

In [167]:
df.head()

Unnamed: 0,rider_name,class,season,home_country,motorcycle,team,races_participated,wins,podium,pole,fastest_lap,points,placed,world_championships
0,Jorge Martin,Moto3,2015,Spain,Mahindra,Mapfre Team Mahindra,18,0,0,0,0,45,17,0
1,Jorge Martin,Moto3,2016,Spain,Mahindra,Pull & Bear Aspar Mahindra Team,16,0,1,0,0,72,16,0
2,Jorge Martin,Moto3,2017,Spain,Honda,Del Conca Gresini Moto3,16,1,9,9,2,196,4,0
3,Jorge Martin,Moto3,2018,Spain,Honda,Del Conca Gresini Moto3,17,7,10,11,3,260,1,1
4,Jorge Martin,Moto2,2019,Spain,KTM,Red Bull KTM Ajo,19,0,2,0,1,94,11,0


In [168]:
X = df.drop(columns=['world_championships'])
y = df['world_championships']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [169]:
step1 = ColumnTransformer(transformers=[
    ('tnf1', StandardScaler(), [2, 6, 7, 8, 9, 10, 11, 12]),
    ('tnf2', OneHotEncoder(drop='first', handle_unknown='ignore'), [0, 1, 3, 4, 5]),
])

In [170]:
step22 = LogisticRegression()

In [171]:
pipe = make_pipeline(step1, step22)

In [172]:
pipe.fit(X_train, y_train)

In [173]:
y_pred = pipe.predict(X_test)



In [174]:
print(accuracy_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))

0.9454545454545454
0.05454545454545454


In [175]:
print(y_pred[3])

0


In [177]:
pickle.dump(pipe, open('bike.pkl', 'wb'))

In [179]:
df.head()

Unnamed: 0,rider_name,class,season,home_country,motorcycle,team,races_participated,wins,podium,pole,fastest_lap,points,placed,world_championships
0,Jorge Martin,Moto3,2015,Spain,Mahindra,Mapfre Team Mahindra,18,0,0,0,0,45,17,0
1,Jorge Martin,Moto3,2016,Spain,Mahindra,Pull & Bear Aspar Mahindra Team,16,0,1,0,0,72,16,0
2,Jorge Martin,Moto3,2017,Spain,Honda,Del Conca Gresini Moto3,16,1,9,9,2,196,4,0
3,Jorge Martin,Moto3,2018,Spain,Honda,Del Conca Gresini Moto3,17,7,10,11,3,260,1,1
4,Jorge Martin,Moto2,2019,Spain,KTM,Red Bull KTM Ajo,19,0,2,0,1,94,11,0


In [180]:
df['rider_name'].unique()

array(['Jorge Martin', 'Johann Zarco', 'Luca Marini', 'Maverick Vinales',
       'Fabio Quartararo', 'Franco Morbidelli', 'Enea Bastianini',
       'Raul Fernandez', 'Brad Binder', 'Somkiat Chantra', 'Joan Mir',
       'Pedro Acosta', 'Alex Rins', 'Jack Miller',
       'Fabio Di Giannantonio', 'Fermin Aldeguer', 'Francesco Bagnaia',
       'Marco Bezzecchi', 'Alex Marquez', 'Ai Ogura', 'Miguel Oliveira',
       'Marc Marquez'], dtype=object)

In [181]:
df['class'].unique()

array(['Moto3', 'Moto2', 'MotoGP', '125cc', 'MotoE'], dtype=object)

In [182]:
df['season'].unique()

array([2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025,
       2009, 2010, 2011, 2012, 2013, 2014, 2008], dtype=int64)

In [183]:
df['home_country'].unique()

array(['Spain', 'France', 'Italy', 'South Africa', 'Thailand',
       'Australia', 'Japan', 'Portugal'], dtype=object)

In [184]:
df['motorcycle'].unique()

array(['Mahindra', 'Honda', 'KTM', 'Kalex', 'Ducati', 'Aprilia', 'Derbi',
       'Motobi', 'Suter', 'Caterham Suter', 'Yamaha', 'FTR Honda',
       'Aprilia RSA 125', 'FTR M312', 'KTM RC250GP', 'Kalex Moto2',
       'Suzuki GSX-RR', 'Yamaha YZR-M1', 'Aprilia RS-GP', 'KTM RC16',
       'Speed Up', 'Kalex KTM', 'Suter Honda', 'Suzuki', 'Energica',
       'Boscoscuro', 'KTM 125 FRR', 'Derbi RSA 125', 'Suter MMXI',
       'Suter MMX2', 'Honda RC213V', 'Ducati Desmosedici GP23',
       'Ducati Desmosedici GP25'], dtype=object)

In [186]:
df['team'].unique()

array(['Mapfre Team Mahindra', 'Pull & Bear Aspar Mahindra Team',
       'Del Conca Gresini Moto3', 'Red Bull KTM Ajo', 'Pramac Racing',
       'Prima Pramac Racing', 'Aprilia Racing', 'WTR San Marino Team',
       'Avant-AirAsia-Ajo', 'JiR Moto2', 'Came IodaRacing Project',
       'AirAsia Caterham', 'Ajo Motorsport', 'Monster Yamaha Tech3',
       'Red Bull KTM Factory Racing', 'LCR Honda Idemitsu',
       'Avintia Esponsorama Racing', 'Castrol Honda LCR', 'Twelve Racing',
       'Pons Racing Junior Team', 'Forward Racing Team',
       'Sky Racing Team VR46', 'Sky VR46 Avintia',
       'Mooney VR46 Racing Team', 'Repsol Honda Team',
       'Honda HRC Castrol', 'Blusens by Paris Hilton Racing',
       'Blusens Avintia', 'Team Calvo', 'Paginas Amarillas HP 40',
       'Team Suzuki Ecstar', 'Movistar Yamaha MotoGP',
       'Monster Energy Yamaha MotoGP', 'Aprilia Racing Team Gresini',
       'Red Bull KTM Tech3', 'Estrella Galicia 0,0', 'Leopard Racing',
       'Pons HP40', 'Speed Up Ra