In [29]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [30]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [31]:
# Load race data
constructor_data = pd.read_csv('/content/drive/MyDrive/Team 11A (Winter+Spring)/CS assignment/Final Project/constructors.csv')
driver_data = pd.read_csv('/content/drive/MyDrive/Team 11A (Winter+Spring)/CS assignment/Final Project/drivers.csv')
results_data = pd.read_csv('/content/drive/MyDrive/Team 11A (Winter+Spring)/CS assignment/Final Project/results.csv')
pitstop_data=pd.read_csv('/content/drive/MyDrive/Team 11A (Winter+Spring)/CS assignment/Final Project/pit_stops.csv')
qualifying_data=pd.read_csv('/content/drive/MyDrive/Team 11A (Winter+Spring)/CS assignment/Final Project/qualifying.csv')
race_data=pd.read_csv('/content/drive/MyDrive/Team 11A (Winter+Spring)/CS assignment/Final Project/races.csv')

In [33]:


# Merge driver data with results data based on driver ID
results_driver_merged = pd.merge(results_data, driver_data, how='left', left_on='driverId', right_on='driverId')

# Merge constructor data with merged driver-results data based on constructor ID
results_constructor_driver_merged = pd.merge(results_driver_merged, constructor_data, how='left', left_on='constructorId', right_on='constructorId')

# Merge race data with merged driver-results data based on race ID
results_constructor_driver_merged = pd.merge(results_constructor_driver_merged, race_data, how='left', left_on='raceId', right_on='raceId')

# Merge pit stop data with merged driver-results data based on race ID
results_constructor_driver_merged = pd.merge(results_constructor_driver_merged, pitstop_data, how='left', left_on='raceId', right_on='raceId')

# Filter the race data to include only the last 30 years
results_constructor_driver_merged = results_constructor_driver_merged[results_constructor_driver_merged['year'] >= results_constructor_driver_merged['year'].max() - 12]

# Create empty directed graph
G = nx.DiGraph()

# Add driver nodes with attributes
for _, driver in results_constructor_driver_merged.iterrows():
    G.add_node(driver['driverRef'], node_type='driver', nationality=driver['nationality_x'])

# Add team nodes with attributes
for _, team in results_constructor_driver_merged.iterrows():
    G.add_node(team['constructorRef'], node_type='team', nationality=team['nationality_y'])

# Add edges for driver-team top 5 win relationships only
for _, race in results_constructor_driver_merged.iterrows():
    # Only add an edge if the driver won the race (position 1)
    #if race['points'] >= 10:
        G.add_edge(race['driverRef'], race['constructorRef'], race_id=race['raceId'])

# Calculate average pit stop time for each driver-team relationship
average_pitstop_times = {}
for edge in G.edges():
    driver, team = edge
    pitstop_times = results_constructor_driver_merged[(results_constructor_driver_merged['driverRef'] == driver) & (results_constructor_driver_merged['constructorRef'] == team)]['milliseconds_y']
    average_pitstop_time = pitstop_times.mean()
    average_pitstop_times[edge] = average_pitstop_time

# Define edge thicknesses based on average pit stop time
max_pitstop_time = max(average_pitstop_times.values())
min_pitstop_time = min(average_pitstop_times.values())
edge_thicknesses = [1 + 9 * (time - min_pitstop_time) / (max_pitstop_time - min_pitstop_time) for time in average_pitstop_times.values()]

# Assuming G has already been constructed with nodes and edges as previously described

# Define node colors by type: 'skyblue' for drivers, 'lightgreen' for teams
node_colors = ['skyblue' if G.nodes[node]['node_type'] == 'driver' else 'lightgreen' for node in G.nodes()]

# Create a layout for our nodes using the spring layout
pos = nx.spring_layout(G, k=0.9, iterations=50, seed=42)  # Adjusted for a more spread out layout



# Draw the graph
plt.figure(figsize=(40, 40))  # Increase figure size for better visibility
nx.draw_networkx_nodes(G, pos, node_color=node_colors, alpha=0.8,node_size=5000)
nx.draw_networkx_edges(G, pos, edge_color='gray', width=edge_thicknesses, alpha=0.5)

# Draw labels with increased font size and a white background for clarity
labels = nx.draw_networkx_labels(G, pos, font_size=14, font_weight='bold', bbox=dict(facecolor='white', edgecolor='none', alpha=0.8))
# Add node labels as attributes to the graph
labels = {node: node for node in G.nodes()}
nx.set_node_attributes(G, labels, 'label')
# Add edge thickness attribute to the graph
nx.set_edge_attributes(G, {edge: {'edge_thickness': thickness} for edge, thickness in zip(G.edges(), edge_thicknesses)})
# To save your graph G to a GraphML file for further analysis or visualization in other tools
nx.write_graphml(G, "network_graph.graphml")



Output hidden; open in https://colab.research.google.com to view.

In [34]:

# Merge driver data with results data based on driver ID
results_driver_merged = pd.merge(results_data, driver_data, how='left', left_on='driverId', right_on='driverId')
# Merge race data with merged driver-results data based on race ID
results_driver_merged = pd.merge(results_driver_merged, race_data, how='left', left_on='raceId', right_on='raceId')
# Merge race data with merged driver-results data based on race ID
results_driver_merged_constructor = pd.merge(results_driver_merged, constructor_data, how='left', left_on='constructorId', right_on='constructorId')
# Merge qualifying data with merged results-driver data based on driver ID and race ID
qualifying_results_merged = pd.merge(qualifying_data, results_driver_merged_constructor, how='inner', on=['driverId', 'raceId'])

# Filter the data to include only the last 30 years
qualifying_results_merged = qualifying_results_merged[qualifying_results_merged['year'] >= qualifying_results_merged['year'].max() - 12]



import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
# Function to convert qualifying time to milliseconds
def time_to_milliseconds(time_str):
    parts = time_str.split(':')
    mins = int(parts[0])
    secs, millisecs = map(int, parts[1].split('.'))
    return mins * 60 * 1000 + secs * 1000 + millisecs


# Convert qualifying times to milliseconds
qualifying_columns = ['q1', 'q2', 'q3']
for col in qualifying_columns:
    qualifying_results_merged[col] = qualifying_results_merged[col].replace('\\N', np.nan)
    qualifying_results_merged[col] = qualifying_results_merged[col].fillna(method='ffill')
    qualifying_results_merged[col] = qualifying_results_merged[col].apply(time_to_milliseconds)

# Create empty directed graph
G = nx.DiGraph()

# Add driver nodes with attributes
for _, driver in qualifying_results_merged.iterrows():
    G.add_node(driver['driverRef'], node_type='driver', nationality=driver['nationality_x'])

# Add team nodes with attributes
for _, team in qualifying_results_merged.iterrows():
    G.add_node(team['constructorRef'], node_type='team', nationality=team['nationality_y'])

# Add edges for average qualifying time between drivers and teams
for _, row in qualifying_results_merged.iterrows():
    driver = row['driverRef']
    team = row['constructorRef']
    qualifying_times = [row[col] for col in qualifying_columns if not pd.isnull(row[col])]
    if qualifying_times:
        average_qualifying_time = sum(qualifying_times) /len(qualifying_times)
        G.add_edge(driver, team, avg_qualifying_time=average_qualifying_time)

# Get edge data and handle case where there are no qualifying times
edge_data = [data['avg_qualifying_time'] for _, _, data in G.edges(data=True) if 'avg_qualifying_time' in data]
if edge_data:
    # Define edge thicknesses based on average qualifying time
    max_qualifying_time = max(edge_data)
    min_qualifying_time = min(edge_data)
    edge_thicknesses = [1 + 9 * (data - min_qualifying_time) / (max_qualifying_time - min_qualifying_time) for data in edge_data]
    #(data - min_qualifying_time) / (max_qualifying_time - min_qualifying_time)
else:
    max_qualifying_time = max(edge_data)
    min_qualifying_time = min(edge_data)
    edge_thicknesses = [1 + 9 * (data - min_qualifying_time) / (max_qualifying_time - min_qualifying_time) for data in edge_data]

# Define node colors by type: 'skyblue' for drivers, 'lightgreen' for teams
node_colors = ['skyblue' if G.nodes[node]['node_type'] == 'driver' else 'lightgreen' for node in G.nodes()]

# Create a layout for our nodes using the spring layout
pos = nx.spring_layout(G, k=0.9, iterations=50, seed=42)  # Adjusted for a more spread out layout


# Draw the graph
plt.figure(figsize=(40, 40))  # Increase figure size for better visibility
nx.draw_networkx_nodes(G, pos, node_color=node_colors, alpha=0.8,node_size=5000)
nx.draw_networkx_edges(G, pos, edge_color='gray', width=edge_thicknesses, alpha=0.5)

# Draw labels with increased font size and a white background for clarity
labels = nx.draw_networkx_labels(G, pos, font_size=14, font_weight='bold', bbox=dict(facecolor='white', edgecolor='none', alpha=0.8))

plt.title('Driver-Team Network with Edge Thickness Representing Average Qualifying Time')
plt.axis('off')  # Turn off the axis for better aesthetics
plt.show()
# Add node labels as attributes to the graph
labels = {node: node for node in G.nodes()}
nx.set_node_attributes(G, labels, 'label')
# Add edge thickness attribute to the graph
nx.set_edge_attributes(G, {edge: {'edge_thickness': thickness} for edge, thickness in zip(G.edges(), edge_thicknesses)})
# To save your graph G to a GraphML file for further analysis or visualization in other tools
nx.write_graphml(G, "network_graph.graphml")

Output hidden; open in https://colab.research.google.com to view.

In [None]:
qualifying_results_merged.info()

In [None]:
results_constructor_driver_merged.info()

In [35]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Merge driver data with results data based on driver ID
results_driver_merged = pd.merge(results_data, driver_data, how='left', left_on='driverId', right_on='driverId')
# Merge race data with merged driver-results data based on race ID
results_driver_merged = pd.merge(results_driver_merged, race_data, how='left', left_on='raceId', right_on='raceId')
# Merge race data with merged driver-results data based on race ID
results_driver_merged_constructor = pd.merge(results_driver_merged, constructor_data, how='left', left_on='constructorId', right_on='constructorId')
# Merge qualifying data with merged results-driver data based on driver ID and race ID
race_results_merged = pd.merge(qualifying_data, results_driver_merged_constructor, how='inner', on=['driverId', 'raceId'])

# Assuming you have a DataFrame containing race results merged with qualifying data
# Let's call it 'race_results_merged'

# Calculate the average qualifying position for each driver-team pair
avg_qualifying_positions = race_results_merged.groupby(['driverRef', 'constructorRef'])['grid'].mean().reset_index()

# Calculate the count of races for each driver-team pair
race_count = race_results_merged.groupby(['driverRef', 'constructorRef']).size().reset_index(name='race_count')

# Merge average qualifying positions and race counts
quali_race_data = pd.merge(avg_qualifying_positions, race_count, on=['driverRef', 'constructorRef'])

# Filter out races where finish position is better than qualifying average
better_finish = race_results_merged[race_results_merged['positionOrder'] < race_results_merged['grid']]

# Count the number of such races for each driver-team pair
better_finish_count = better_finish.groupby(['driverRef', 'constructorRef']).size().reset_index(name='better_finish_count')

# Merge better finish count with qualifying and race count data
quali_race_data = pd.merge(quali_race_data, better_finish_count, on=['driverRef', 'constructorRef'], how='left')

# Fill NaN values (no better finishes) with 0
quali_race_data['better_finish_count'] = quali_race_data['better_finish_count'].fillna(0)

# Create empty directed graph
G = nx.DiGraph()

# Add nodes for drivers and teams
for _, row in quali_race_data.iterrows():
    G.add_node(row['driverRef'], node_type='driver')
    G.add_node(row['constructorRef'], node_type='team')

# Add edges between drivers and teams with thickness based on better finish count
for _, row in quali_race_data.iterrows():
    thickness = (row['better_finish_count'] / row['race_count'])*10 if row['race_count'] > 0 else 0
    G.add_edge(row['driverRef'], row['constructorRef'], thickness=thickness)

# Define node colors by type: 'skyblue' for drivers, 'lightgreen' for teams
node_colors = ['skyblue' if G.nodes[node]['node_type'] == 'driver' else 'lightgreen' for node in G.nodes()]

# Create a layout for our nodes using the spring layout
pos = nx.spring_layout(G, k=0.9, iterations=50, seed=0)  # Adjusted for a more spread out layout

# Draw the graph
plt.figure(figsize=(40, 40))  # Increase figure size for better visibility
nx.draw_networkx_nodes(G, pos, node_color=node_colors, alpha=0.8,node_size=5000)
nx.draw_networkx_edges(G, pos, edge_color='gray', width=thickness, alpha=0.5)

# Draw labels with increased font size and a white background for clarity
labels = nx.draw_networkx_labels(G, pos, font_size=14, font_weight='bold', bbox=dict(facecolor='white', edgecolor='none', alpha=0.8))

plt.title('Driver-Team Network with Edge Thickness Representing Average better finish')
plt.axis('off')  # Turn off the axis for better aesthetics
plt.show()


Output hidden; open in https://colab.research.google.com to view.