In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split

In [7]:
# Load the CSV files
event_details_path = 'ufc_event_details.csv'
fight_results_path = 'ufc_fight_results.csv'
fight_stats_path = 'ufc_fight_stats.csv'

In [8]:
# create dataframes
event_details_df = pd.read_csv(event_details_path)
fight_results_df = pd.read_csv(fight_results_path)
fight_stats_df = pd.read_csv(fight_stats_path)

In [10]:
# convert 'event_date' to datetime format in the event details DataFrame
event_details_df['DATE'] = pd.to_datetime(event_details_df['DATE'])

In [11]:
# define a function to filter events based on a date range
def filter_events_by_date_range(df, start_date, end_date):
    mask = (df['DATE'] >= start_date) & (df['DATE'] <= end_date)
    return df.loc[mask]

In [12]:
# input date range for filtering
start_date = '2020-01-01'
end_date = '2022-12-31'

In [19]:
#filter the events based on the date range
filter_events_df = filter_events_by_date_range(event_details_df, start_date, end_date)
print('events in date range: ', filter_events_df)

events in date range:                                           EVENT  \
62   UFC Fight Night: Cannonier vs. Strickland   
63            UFC 282: Blachowicz vs. Ankalaev   
64       UFC Fight Night: Thompson vs. Holland   
65     UFC Fight Night: Nzechukwu vs. Cutelaba   
66               UFC 281: Adesanya vs. Pereira   
..                                         ...   
183         UFC Fight Night: Felder vs. Hooker   
184   UFC Fight Night: Anderson vs. Blachowicz   
185                   UFC 247: Jones vs. Reyes   
186    UFC Fight Night: Blaydes vs. Dos Santos   
187               UFC 246: McGregor vs. Cowboy   

                                                   URL       DATE  \
62   http://ufcstats.com/event-details/56ec58954158... 2022-12-17   
63   http://ufcstats.com/event-details/f65a0eb902f9... 2022-12-10   
64   http://ufcstats.com/event-details/b23388ff8ac6... 2022-12-03   
65   http://ufcstats.com/event-details/012fc7cd0779... 2022-11-19   
66   http://ufcstats.com/event-

In [21]:
# extract the event names
filtered_event_names = filter_events_df['EVENT'].tolist()
print('filtered_event_names: ', filtered_event_names)

filtered_event_names:  ['UFC Fight Night: Cannonier vs. Strickland', 'UFC 282: Blachowicz vs. Ankalaev', 'UFC Fight Night: Thompson vs. Holland', 'UFC Fight Night: Nzechukwu vs. Cutelaba', 'UFC 281: Adesanya vs. Pereira', 'UFC Fight Night: Rodriguez vs. Lemos', 'UFC Fight Night: Kattar vs. Allen', 'UFC 280: Oliveira vs. Makhachev', 'UFC Fight Night: Grasso vs. Araujo', 'UFC Fight Night: Dern vs. Yan', 'UFC Fight Night: Sandhagen vs. Song', 'UFC 279: Diaz vs. Ferguson', 'UFC Fight Night: Gane vs. Tuivasa', 'UFC 278: Usman vs. Edwards', 'UFC Fight Night: Vera vs. Cruz', 'UFC Fight Night: Santos vs. Hill', 'UFC 277: Pena vs. Nunes 2', 'UFC Fight Night: Blaydes vs. Aspinall', 'UFC Fight Night: Ortega vs. Rodriguez', 'UFC Fight Night: Dos Anjos vs. Fiziev', 'UFC 276: Adesanya vs. Cannonier', 'UFC Fight Night: Tsarukyan vs. Gamrot', 'UFC Fight Night: Kattar vs. Emmett', 'UFC 275: Teixeira vs. Prochazka', 'UFC Fight Night: Volkov vs. Rozenstruik', 'UFC Fight Night: Holm vs. Vieira', 'UFC Figh

In [26]:
# remove space after last word in event name in fight_results.csv
fight_results_df['EVENT'] = fight_results_df['EVENT'].str.strip()

# filter fight results and stats based on the filtered event names
filtered_fight_results_df = fight_results_df[fight_results_df['EVENT'].isin(filtered_event_names)]
filtered_fight_stats_df = fight_stats_df[fight_stats_df['EVENT'].isin(filtered_event_names)]
display(filtered_fight_results_df)
display(filtered_fight_stats_df)

Unnamed: 0,EVENT,BOUT,OUTCOME,WEIGHTCLASS,METHOD,ROUND,TIME,TIME FORMAT,REFEREE,DETAILS,URL
733,UFC Fight Night: Cannonier vs. Strickland,Jared Cannonier vs. Sean Strickland,W/L,Middleweight Bout,Decision - Split,5,5:00,5 Rnd (5-5-5-5-5),Herb Dean,Derek Cleary 46 - 49.Sal D'amato 49 - 46.Junic...,http://ufcstats.com/fight-details/4ea48bf2407b...
734,UFC Fight Night: Cannonier vs. Strickland,Arman Tsarukyan vs. Damir Ismagulov,W/L,Lightweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Mark Smith,Mike Bell 27 - 30.Sal D'amato 27 - 30.Ron McCa...,http://ufcstats.com/fight-details/ce7744d45a69...
735,UFC Fight Night: Cannonier vs. Strickland,Amir Albazi vs. Alessandro Costa,W/L,Flyweight Bout,KO/TKO,3,2:13,3 Rnd (5-5-5),Keith Peterson,Punch to Head At Distance,http://ufcstats.com/fight-details/d95eeba3de7e...
736,UFC Fight Night: Cannonier vs. Strickland,Alex Caceres vs. Julian Erosa,W/L,Featherweight Bout,KO/TKO,1,3:04,3 Rnd (5-5-5),Herb Dean,Kick to Head At Distance,http://ufcstats.com/fight-details/9c1a2ac64c98...
737,UFC Fight Night: Cannonier vs. Strickland,Drew Dober vs. Bobby Green,W/L,Lightweight Bout,KO/TKO,2,2:45,3 Rnd (5-5-5),Keith Peterson,Punch to Head At Distance,http://ufcstats.com/fight-details/c335f44e4f35...
...,...,...,...,...,...,...,...,...,...,...,...
2204,UFC 246: McGregor vs. Cowboy,Andre Fili vs. Sodiq Yusuff,L/W,Featherweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Chris Tognoni,Chris Lee 28 - 29.Derek Cleary 28 - 29.Adalaid...,http://ufcstats.com/fight-details/5868c1547896...
2205,UFC 246: McGregor vs. Cowboy,Tim Elliott vs. Askar Askarov,L/W,Flyweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Keith Peterson,Mike Bell 28 - 29.Sal D'amato 27 - 30.Dave Hag...,http://ufcstats.com/fight-details/ff467a5f06a9...
2206,UFC 246: McGregor vs. Cowboy,Drew Dober vs. Nasrat Haqparast,W/L,Lightweight Bout,KO/TKO,1,1:10,3 Rnd (5-5-5),Mark Smith,Punch to Head At Distance,http://ufcstats.com/fight-details/63dd38f0dcc2...
2207,UFC 246: McGregor vs. Cowboy,Aleksa Camur vs. Justin Ledet,W/L,Light Heavyweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Herb Dean,Derek Cleary 28 - 29.Ron McCarthy 27 - 30.Tony...,http://ufcstats.com/fight-details/6087c50c5f95...


Unnamed: 0,EVENT,BOUT,ROUND,FIGHTER,KD,SIG.STR.,SIG.STR. %,TOTAL STR.,TD,TD %,SUB.ATT,REV.,CTRL,HEAD,BODY,LEG,DISTANCE,CLINCH,GROUND
3596,UFC Fight Night: Cannonier vs. Strickland,Jared Cannonier vs. Sean Strickland,Round 1,Jared Cannonier,0.0,16 of 43,37%,16 of 43,0 of 0,---,0.0,0.0,0:00,3 of 26,7 of 8,6 of 9,16 of 43,0 of 0,0 of 0
3597,UFC Fight Night: Cannonier vs. Strickland,Jared Cannonier vs. Sean Strickland,Round 2,Jared Cannonier,0.0,25 of 58,43%,25 of 58,0 of 0,---,0.0,0.0,0:00,6 of 28,11 of 18,8 of 12,25 of 58,0 of 0,0 of 0
3598,UFC Fight Night: Cannonier vs. Strickland,Jared Cannonier vs. Sean Strickland,Round 3,Jared Cannonier,0.0,33 of 69,47%,33 of 69,0 of 0,---,0.0,0.0,0:00,12 of 40,15 of 21,6 of 8,33 of 69,0 of 0,0 of 0
3599,UFC Fight Night: Cannonier vs. Strickland,Jared Cannonier vs. Sean Strickland,Round 4,Jared Cannonier,0.0,27 of 61,44%,27 of 61,0 of 0,---,0.0,0.0,0:00,14 of 39,11 of 18,2 of 4,27 of 61,0 of 0,0 of 0
3600,UFC Fight Night: Cannonier vs. Strickland,Jared Cannonier vs. Sean Strickland,Round 5,Jared Cannonier,0.0,40 of 79,50%,40 of 79,0 of 0,---,0.0,0.0,0:00,22 of 54,16 of 23,2 of 2,40 of 79,0 of 0,0 of 0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10835,UFC 246: McGregor vs. Cowboy,Sabina Mazo vs. JJ Aldrich,Round 2,Sabina Mazo,0.0,46 of 133,34%,46 of 133,0 of 0,---,0.0,0.0,0:00,32 of 116,7 of 8,7 of 9,42 of 129,4 of 4,0 of 0
10836,UFC 246: McGregor vs. Cowboy,Sabina Mazo vs. JJ Aldrich,Round 3,Sabina Mazo,0.0,43 of 93,46%,46 of 97,0 of 0,---,0.0,0.0,2:01,18 of 64,18 of 20,7 of 9,21 of 67,22 of 26,0 of 0
10837,UFC 246: McGregor vs. Cowboy,Sabina Mazo vs. JJ Aldrich,Round 1,JJ Aldrich,0.0,15 of 47,31%,15 of 47,0 of 0,---,0.0,0.0,0:00,11 of 40,2 of 3,2 of 4,14 of 46,1 of 1,0 of 0
10838,UFC 246: McGregor vs. Cowboy,Sabina Mazo vs. JJ Aldrich,Round 2,JJ Aldrich,0.0,26 of 53,49%,31 of 59,0 of 0,---,0.0,0.0,0:00,20 of 47,5 of 5,1 of 1,24 of 49,2 of 4,0 of 0


In [27]:
# merge the filtered fight results and stats DataFrames on the relevant columns
merged_df = pd.merge(filtered_fight_results_df, filtered_fight_stats_df, on=['EVENT'])
print(merged_df)

                                           EVENT  \
0      UFC Fight Night: Cannonier vs. Strickland   
1      UFC Fight Night: Cannonier vs. Strickland   
2      UFC Fight Night: Cannonier vs. Strickland   
3      UFC Fight Night: Cannonier vs. Strickland   
4      UFC Fight Night: Cannonier vs. Strickland   
...                                          ...   
86229               UFC 246: McGregor vs. Cowboy   
86230               UFC 246: McGregor vs. Cowboy   
86231               UFC 246: McGregor vs. Cowboy   
86232               UFC 246: McGregor vs. Cowboy   
86233               UFC 246: McGregor vs. Cowboy   

                                      BOUT_x OUTCOME             WEIGHTCLASS  \
0      Jared Cannonier  vs. Sean Strickland      W/L       Middleweight Bout   
1      Jared Cannonier  vs. Sean Strickland      W/L       Middleweight Bout   
2      Jared Cannonier  vs. Sean Strickland      W/L       Middleweight Bout   
3      Jared Cannonier  vs. Sean Strickland      W/L   

In [28]:
# Split the merged DataFrame into training, validation, and test sets
train_df, temp_df = train_test_split(merged_df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Display the sizes of the datasets
print(f'Training set size: {len(train_df)}')
print(f'Validation set size: {len(val_df)}')
print(f'Test set size: {len(test_df)}')

Training set size: 60363
Validation set size: 12935
Test set size: 12936


In [29]:
# Save the datasets to new CSV files
train_df.to_csv('ufc_fight_stats_train.csv', index=False)
val_df.to_csv('ufc_fight_stats_val.csv', index=False)
test_df.to_csv('ufc_fight_stats_test.csv', index=False)

# Display a message indicating completion
print('Datasets created and saved successfully.')

Datasets created and saved successfully.
