In [2]:
odds_path = r"G:\My Drive\dotabet\odds\the_odds.csv"
matches_path = r"D:\WORKSPACE\dotabet\data\simple_matches.csv"
output_file = r"G:\My Drive\dotabet\odds\the_odds_updated.csv"

In [4]:
import pandas as pd
from datetime import datetime

# Load the datasets
odds_df = pd.read_csv(odds_path)
matches_df = pd.read_csv(matches_path)

# Convert 'Match_Date' and 'start_time' to datetime.date for easier comparison
odds_df['Match_Date'] = pd.to_datetime(odds_df['Match_Date']).dt.date
matches_df['start_time'] = pd.to_datetime(matches_df['start_time'], format='%d-%m-%Y').dt.date

# Normalize team names
odds_df['Team_1'] = odds_df['Team_1'].str.strip().str.lower()
odds_df['Team_2'] = odds_df['Team_2'].str.strip().str.lower()
matches_df['radiant_name'] = matches_df['radiant_name'].str.strip().str.lower()
matches_df['dire_name'] = matches_df['dire_name'].str.strip().str.lower()

# Create sets of teams for comparison
odds_df['Teams'] = odds_df.apply(lambda x: frozenset([x['Team_1'], x['Team_2']]), axis=1)
matches_df['Teams'] = matches_df.apply(lambda x: frozenset([x['radiant_name'], x['dire_name']]), axis=1)

# Determine the winning and losing teams
matches_df['win_team_name'] = matches_df.apply(lambda x: x['radiant_name'] if x['radiant_win'] == 'TRUE' else x['dire_name'], axis=1)
matches_df['win_team_id'] = matches_df.apply(lambda x: x['radiant_team_id'] if x['radiant_win'] == 'TRUE' else x['dire_team_id'], axis=1)
matches_df['lost_team_id'] = matches_df.apply(lambda x: x['radiant_team_id'] if x['radiant_win'] == 'FALSE' else x['dire_team_id'], axis=1)

# Merge the DataFrames
combined_df = pd.merge(
    odds_df,
    matches_df,
    left_on=['Teams', 'Match_Date'],
    right_on=['Teams', 'start_time'],
    how='left'
)

# Select and rename columns to include in the final output
final_columns = [
    'Date', 'League_Name', 'Team_1', 'Odd_1', 'Team_2', 'Odd_2', 'Match_Date',
    'match_id', 'duration', 'radiant_score', 'dire_score', 'radiant_win',
    'win_team_name', 'win_team_id', 'lost_team_id'
]
final_df = combined_df[final_columns]

# Fill NaN for matches that did not find any corresponding records
final_df.update(final_df[['match_id', 'duration', 'radiant_score', 'dire_score', 'radiant_win', 'win_team_name', 'win_team_id', 'lost_team_id']].fillna('No Match Found'))

# Save the updated DataFrame to a new CSV file
final_df.to_csv(output_file, index=False)
