In [None]:
odds_path = r"G:\My Drive\dotabet\odds\the_odds.csv"
matches_path = r"D:\WORKSPACE\dotabet\data\simple_matches.csv"
output_file = r"G:\My Drive\dotabet\odds\the_odds_updated.csv"

In [None]:
import pandas as pd
from datetime import datetime

# Load the datasets
odds_df = pd.read_csv(odds_path)
matches_df = pd.read_csv(matches_path)

# Convert 'Match_Date' and 'start_time' to datetime.date for easier comparison
odds_df['Match_Date'] = pd.to_datetime(odds_df['Match_Date']).dt.date
matches_df['start_time'] = pd.to_datetime(matches_df['start_time'], format='%d-%m-%Y').dt.date

# Ensure team names are treated case-insensitively and as trimmed strings
odds_df['Team_1'] = odds_df['Team_1'].str.strip().str.lower()
odds_df['Team_2'] = odds_df['Team_2'].str.strip().str.lower()
matches_df['radiant_name'] = matches_df['radiant_name'].str.strip().str.lower()
matches_df['dire_name'] = matches_df['dire_name'].str.strip().str.lower()

# Create helper columns to facilitate easy comparison
odds_df['Teams'] = odds_df.apply(lambda x: frozenset([x['Team_1'], x['Team_2']]), axis=1)
matches_df['Teams'] = matches_df.apply(lambda x: frozenset([x['radiant_name'], x['dire_name']]), axis=1)

# Merge the DataFrames based on the 'Teams' and 'Match_Date'/'start_time'
combined_df = pd.merge(
    odds_df,
    matches_df,
    left_on=['Teams', 'Match_Date'],
    right_on=['Teams', 'start_time'],
    how='left'
)

# Select the columns to keep in the final output
final_columns = [
    'Date', 'League_Name', 'Team_1', 'Odd_1', 'Team_2', 'Odd_2', 'Match_Date',
    'match_id', 'duration', 'radiant_score', 'dire_score', 'radiant_win'
]
final_df = combined_df[final_columns]

# Fill NaN match details with appropriate values if no match is found
final_df[['match_id', 'duration', 'radiant_score', 'dire_score', 'radiant_win']] = final_df[['match_id', 'duration', 'radiant_score', 'dire_score', 'radiant_win']].fillna('No Match Found')

# Save the updated DataFrame to a new CSV file
final_df.to_csv(output_file, index=False)
