## Odds Joining

The purpose of this notebook is to determine how to join odds data, from [here](http://www.tennis-data.co.uk/alldata.php), with the parsed data.  This is difficult because player's names are represented differently, the parsed data doesn't have match dates, tournaments are represented differently, and so on.  There are various manual corrections we will need to make to create our basis for joining the data here.

#### Joining Odds Data to My Data

In [1]:
from pathlib import Path
from tennis_new.infra.defs import REPO_DIR

ODDS_PATH = Path.joinpath(
    REPO_DIR,
    'fetch/odds_data/'
)

In [2]:
import pandas as pd
from tennis_new.fetch.get_joined import read_joined

YEAR = 2006
odds_df = pd.read_csv(Path.joinpath(ODDS_PATH, "%d.csv" % YEAR))
all_odds = pd.read_csv("./all_odds.csv")
jd = read_joined()

  interactivity=interactivity, compiler=compiler, result=result)
  if (yield from self.run_code(code, result)):


#### Name Processing

Let's process the player's names so we can join on them

In [14]:
# TODO: Confirm names for a given player don't change over time?
ODDS_NAME_CORRECTIONS = {
    'DEL POTRO J. M.': 'DEL POTRO J.M.',
    'GAMBILL J. M.': 'GAMBILL J.M.',
    'QUERRY S.': 'QUERREY S.',
    'BAUTISTA AGUT R.': 'BAUTISTA R.',
    'BOGOMOLOV JR. A.': 'BOGOMOLOV A.',
    'RAMIREZ HIDALGO R.': 'RAMIREZ-HIDALGO R.',
    'CARRENO BUSTA P.': 'CARRENO-BUSTA P.',
    'MUNOZ-DE LA NAVA D.': 'MUNOZ-DE-LA-NAVA D.',
    'MUNOZ DE LA NAVA D.': 'MUNOZ-DE-LA-NAVA D.',
    'DEL BONIS F.': 'DELBONIS F.',
    'HANTSCHEK M.': 'HANTSCHK M.',
    'HAIDER-MAUER A.': 'HAIDER-MAURER A.',
    'DE HEART R.': 'DEHEART R.',
    'MATSUKEVITCH D.': 'MATSUKEVICH D.',
    'NADAL-PARERA R.': 'NADAL R.',
    'AL GHAREEB M.': 'GHAREEB M.',
    "AL-GHAREEB M.": "GHAREEB M.",
    'WANG Y. JR': 'WANG-JR. Y.',
    "SANCHEZ DE LUNA J.": "SANCHEZ-DE-LUNA J.A",
    "SANCHEZ DE LUNA J.A.": "SANCHEZ-DE-LUNA J.A.",
    "DEV VARMAN S.": "DEVVARMAN S.",
    "GRANOLLERS-PUJOL M.": "GRANOLLERS M.",
    "GRANOLLERS PUJOL G.": "GRANOLLERS G.",
    "GRANOLLERS-PUJOL G.": "GRANOLLERS G.",
    "BAHROUZYAN O.": "AWADHY O.",
    "ALI MUTAWA J.M.": "AL-MUTAWA J.M.",
    "AL MUTAWA J.": "AL-MUTAWA J.",
    "ZAYID M. S.": "ZAYID M.S.",
    "GALLARDO VALLES M.": "GALLARDO-VALLES M.",
    "RIBA-MADRID P.": "RIBA P.",
    "CHEKOV P.": "CHEKHOV P.",
    "SAAVEDRA CORVALAN C.": "SAAVEDRA-CORVALAN C.",
    "HAJI A.": "HAJJI A.",
    "ZAYED M. S.": "ZAYED M.S.",
    "KUNITCIN I.": "KUNITSYN I.",
    "DEEN HESHAAM A.": "DEEN-HESHAAM A.",
    "ESTRELLA V.": "ESTRELLA-BURGOS V.",
    "SCHUTTLER P.": "SCHUETTLER R.",
    "TYURNEV E.": "TIURNEV E.",
    "SULTAN-KHALFAN A.": "KHALFAN S.",
    "VAN D. MERWE I.": "VAN DER MERWE I.",
    "ALAWADHI O.": "AWADHY O.",
    "RASCON T.": "RASCON-LOPE J.T.",
    "RUEVSKI P.": "RUSEVSKI P.",
    "ESTRELLA BURGOS V.": "ESTRELLA-BURGOS V.",
    "VAN DER DIUM A.": "VAN DER DUIM A.",
    "AL KHULAIFI N.G.": "AL-KHULAIFI N.G."
}

JD_WHOLE_NAME_CORRECTIONS = {
    'IVAN NAVARRO': 'IVAN NAVARRO-PASTOR',
    'DANIEL MUNOZ DE LA NAVA': 'DANIEL MUNOZ-DE-LA-NAVA',
    'MIGUEL ANGEL LOPEZ JAEN': 'MIGUEL-ANGEL LOPEZ-JAEN',
    'YU JR. WANG': 'YU WANG-JR.',
    "OMAR ALAWADHI": "OMAR AWADHY",
    "MIGUEL ANGEL REYES-VARELA": "MIGUEL-ANGEL REYES-VARELA",
    "ISRAEL MATOS GIL": "ISRAEL MATOS-GIL",
    "ARIEZ ELYAAS DEEN HESHAAM": "ARIEZ-ELYAAS DEEN-HESHAAM",
    "ENRIQUE LOPEZ PEREZ": "ENRIQUE LOPEZ-PEREZ",
    "VICTOR ESTRELLA BURGOS": "VICTOR ESTRELLA-BURGOS",
    "MAHMOUD-NADER AL BALOUSHI": "MAHMOUD NADER"
}


JD_NAME_CORRECTIONS = {
    'MARTIN DEL POTRO': 'DEL POTRO',
    'IGNACIO LONDERO': 'LONDERO',
    'FERREIRA SILVA': "SILVA",
    "ELAHI GALAN": "GALAN",
    "CARLOS FERRERO": "FERRERO",
    "IGNACIO CHELA": "CHELA",
    "ALBERT VILOCA-PUIG": "VILOCA",
    "BURRIEZA-LOPEZ": "BURRIEZA",
    "BOGOMOLOV JR.": "BOGOMOLOV",
    # "KHALFAN": "AL-ALAWI",
    # "ALAWADHI": "BAHROUZYAN",
    "BAUTISTA AGUT": "BAUTISTA",
    "RAMIREZ HIDALGO": "RAMIREZ-HIDALGO",
    "VASSALLO ARGUELLO": "VASSALLO-ARGUELLO",
    "CARRENO BUSTA": "CARRENO-BUSTA",
    "PABLO BRZEZICKI": "BRZEZICKI",
    "TRUJILLO-SOLER": "TRUJILLO",
    "MARCO MORONI": "MORONI",
    "SALVA-VIDAL": "SALVA",
    "ANTONIO SANCHEZ-DE LUNA": "SANCHEZ-DE-LUNA",
    "SEBASTIAN CABAL": "CABAL",
    "SHANNAN ZAYID": "ZAYID",
    "SHANAN ZAYED": "ZAYED",
    "VIJAY SUNDAR PRASHANTH": 'PRASHANTH',
    "DON GRUBER": "GRUBER",
    "PAUL FRUTTERO": "FRUTTERO",
    "LUQUE-VELASCO": "LUQUE"
}

In [15]:
sorted(odds_df.columns)

['ATP',
 'B365L',
 'B365W',
 'Best of',
 'CBL',
 'CBW',
 'Comment',
 'Court',
 'Date',
 'EXL',
 'EXW',
 'L1',
 'L2',
 'L3',
 'L4',
 'L5',
 'LPts',
 'LRank',
 'Location',
 'Loser',
 'Lsets',
 'PSL',
 'PSW',
 'Round',
 'Series',
 'Surface',
 'Tournament',
 'UBL',
 'UBW',
 'W1',
 'W2',
 'W3',
 'W4',
 'W5',
 'WPts',
 'WRank',
 'Winner',
 'Wsets']

In [16]:
# Preprocessing for upper case
jd['winner_name'] = jd['winner_name'].map(lambda x: x.upper())
jd['loser_name'] = jd['loser_name'].map(lambda x: x.upper())
jd['winner_name'] = jd['winner_name'].map(lambda x: JD_WHOLE_NAME_CORRECTIONS.get(x, x))
jd['loser_name'] = jd['loser_name'].map(lambda x: JD_WHOLE_NAME_CORRECTIONS.get(x, x))
all_odds['Winner'] = all_odds['Winner'].map(lambda x: x.upper())
all_odds['Loser'] = all_odds['Loser'].map(lambda x: x.upper())
all_odds['Winner'] = all_odds['Winner'].map(lambda x: ODDS_NAME_CORRECTIONS.get(x, x))
all_odds['Loser'] = all_odds['Loser'].map(lambda x: ODDS_NAME_CORRECTIONS.get(x, x))

In [17]:
def last_name_jd(n):
    return ' '.join(n.upper().strip().split(' ')[1: ])

def last_name_odds(n):
    return ' '.join(n.upper().strip().split(' ')[: -1])

jd['winner_last_name'] = jd['winner_name'].map(last_name_jd)
jd['loser_last_name'] = jd['loser_name'].map(last_name_jd)
jd['winner_last_name'] = jd['winner_last_name'].map(lambda x: JD_NAME_CORRECTIONS.get(x, x))
jd['loser_last_name'] = jd['loser_last_name'].map(lambda x: JD_NAME_CORRECTIONS.get(x, x))

In [18]:
all_odds['winner_last_name'] = all_odds['Winner'].map(last_name_odds)
all_odds['loser_last_name'] = all_odds['Loser'].map(last_name_odds)

In [20]:
top_missing_winners = all_odds['Winner'][~all_odds['winner_last_name'].isin(jd['winner_last_name'])].value_counts()
top_missing_winners                              

Series([], Name: Winner, dtype: int64)

In [22]:
top_missing_losers = all_odds['Loser'][~all_odds['loser_last_name'].isin(jd['loser_last_name'])].value_counts()
top_missing_losers                              

Series([], Name: Loser, dtype: int64)

In [10]:
TO_INVESTIGATE = 'KHULAIFI'

In [11]:
all_odds['Loser'][all_odds['loser_last_name'].map(lambda x: TO_INVESTIGATE in x)].value_counts()

AL KHULAIFI N.G.    1
Name: Loser, dtype: int64

In [12]:
all_odds[[
    'Winner',
    'winner_last_name',
    'Loser',
    'loser_last_name',
    'Tournament',
    'Location',
    'Date',
    'W1',
    'L1',
    'W2',
    'L2'
]][all_odds['loser_last_name'].map(lambda x: TO_INVESTIGATE in x)].head()

Unnamed: 0,Winner,winner_last_name,Loser,loser_last_name,Tournament,Location,Date,W1,L1,W2,L2
72,OGORODOV O.,OGORODOV,AL KHULAIFI N.G.,AL KHULAIFI,Qatar Open,Doha,12/31/01,6.0,1.0,6.0,2.0


In [13]:
jd[[
    'loser_name',
    'loser_last_name',
]][jd['loser_last_name'].map(lambda x: TO_INVESTIGATE in x)].drop_duplicates(
    ['loser_name', 'loser_last_name']
)

Unnamed: 0,loser_name,loser_last_name
144402,NASSER-GHANIM AL-KHULAIFI,AL-KHULAIFI


#### Location Mapping

#### Tournament Mapping

The tournament names are presented differently as well.  We will have to map tournament names to each other to provide the right mappings...

In [None]:
# Preprocessing, don't wanna deal with case issues

jd['tourney_title'] = jd['tourney_title'].map(lambda x: x.upper())
odds_df['Tournament'] = odds_df['Tournament'].map(lambda x: x.upper().strip(' '))

In [None]:
odds_df['Tournament'][~odds_df['Tournament'].isin(jd['tourney_title'])].value_counts().head()

In [None]:
pd.set_option('display.max_rows', 100, 'display.max_columns', None) # more options can be specified also
wubba = odds_df.groupby(['Tournament', 'Location']).apply(
    lambda x: pd.Series({
        'n_matches': x.shape[0],
        'min_date': x['Date'].min()
    })
).sort_values('n_matches', ascending=False)
wubba

In [None]:
def tourney_stats(s):
    tourney_titles = jd[
        jd['tourney_title'].map(lambda x: s in x) &
        (jd['tour_type'] == 'atp')
    ]['tourney_title'].unique()
    rel = jd[
        jd['tourney_title'].isin(tourney_titles) &
        (jd['tour_type'] == 'atp')
    ]
    print(rel['tourney_title'].value_counts())
    print(rel.groupby('tourney_title').apply(lambda x: (x['tourney_dates'].min(), x['tourney_dates'].max())))
    
tourney_stats("BNP")

In [None]:
# To help with joining odds data...
TOURNAMENT_MAPPING = {
    'BRISBANE INTERNATIONAL': ['BRISBANE', 'BRISBANE INTERNATIONAL'],
    'US OPEN': 'US OPEN',
    'WIMBLEDON': 'WIMBLEDON',
    'FRENCH OPEN': 'ROLAND GARROS',
    'AUSTRALIAN OPEN': ['AUSTRALIAN OPEN',  'AUSTRALIAN OPEN-2'],
    'SONY ERICSSON OPEN': [
        'ATP MASTERS 1000 MIAMI',
        'MIAMI OPEN PRESENTED BY ITAU',
        'MIAMI'
    ],
    "BNP PARIBAS OPEN": ['BNP PARIBAS OPEN', 'ATP MASTERS 1000 INDIAN WELLS', 'INDIAN WELLS'],
    "BNP PARIBAS MASTERS": ['BNP PARIBAS MASTERS', 'ATP MASTERS 1000 PARIS', 'PARIS'],
    "INTERNAZIONALI BNL D'ITALIA": [
        "INTERNAZIONALI BNL D'ITALIA",
        "ATP MASTERS 1000 ROME",
        "ROME",
    ],
    "WESTERN & SOUTHERN FINANCIAL GROUP MASTERS": ["WESTERN & SOUTHERN OPEN", "ATP MASTERS 1000 CINCINNATI", "CINCINNATI"],
    "AEGON CHAMPIONSHIPS": "LONDON / QUEEN'S CLUB",
    "MONTE CARLO MASTERS": "ATP MASTERS 1000 MONTE CARLO",
    "MUTUA MADRID OPEN": ["ATP MASTERS 1000 MADRID", "MUTUA MADRID OPEN"],
    "SHANGHAI MASTERS": ["ATP MASTERS 1000 SHANGHAI", "SHANGHAI"],
    "ROGERS MASTERS": ["ATP MASTERS 1000 CANADA", "COUPE ROGERS"],
    "CITI OPEN": ["WASHINGTON", "CITI OPEN"],
    "GERMAN TENNIS CHAMPIONSHIPS": [
        "GERMAN OPEN TENNIS CHAMPIONSHIPS",
        "HAMBURG",
        "ATP MASTERS 1000 HAMBURG",
        "HAMBURG EUROPEAN OPEN"
    ],
    "OPEN BANCO SABADELL": [
        "BARCELONA OPEN BANC SABADELL",
        "BARCELONA",
    ],
    "SWISS INDOORS": "BASEL",
    "RAKUTEN JAPAN OPEN TENNIS CHAMPIONSHIPS": ["RAKUTEN JAPAN OPEN TENNIS CHAMPIONSHIPS", "TOKYO"],
    "QATAR EXXON MOBIL OPEN": ["QATAR EXXONMOBIL OPEN", "DOHA"],
    "GRAND PRIX HASSAN II": "MARRAKECH",
    "GERRY WEBER OPEN": "HALLE",
    "CHINA OPEN": ["CHINA OPEN", "BEIJING"],
    "ABIERTO MEXICANO": ["ABIERTO MEXICANO TELCEL PRESENTADO POR HSBC", "ACAPULCO"],
    "DELRAY BEACH OPEN": ["DELRAY BEACH", "DELRAY BEACH OPEN BY VITACOST.COM"],
    "DUBAI TENNIS CHAMPIONSHIPS": ["DUBAI", "DUBAI DUTY FREE TENNIS CHAMPIONSHIPS"],
    "ERSTE BANK OPEN": ["VIENNA", "ERSTE BANK OPEN"],
    "ARGENTINA OPEN": ["BUENOS AIRES", "ARGENTINA OPEN"],
    "RICOH OPEN": ["S-HERTOGENBOSCH", "'S-HERTOGENBOSCH"],
    "EASTBOURNE INTERNATIONAL": ["EASTBOURNE", "NATURE VALLEY INTERNATIONAL"],
    "BB&T ATLANTA OPEN": ["BB&T ATLANTA OPEN", "ATLANTA"],
    "ASB CLASSIC": ["ASB CLASSIC", "AUCKLAND"],
    "SHENZHEN OPEN": ["SHENZHEN"],
    "SKISTAR SWEDISH OPEN": ["BASTAD", "SWEDISH OPEN"],
    "STOCKHOLM OPEN": ["STOCKHOLM", "INTRUM STOCKHOLM OPEN"],
    "ST. PETERSBURG OPEN": ["ST. PETERSBURG", "ST. PETERSBURG OPEN"],
    "SUISSE OPEN GSTAAD": ["GSTAAD", "J. SAFRA SARASIN SWISS OPEN GSTAAD"],
    "SYDNEY INTERNATIONAL": ["SYDNEY INTERNATIONAL", "SYDNEY"],
    "TATA OPEN": ["TATA OPEN MAHARASHTRA", "PUNE"],
    "U.S. MEN'S CLAY COURT CHAMPIONSHIPS": ["HOUSTON", "FAYEZ SAROFIM & CO. U.S. MEN'S CLAY COURT CHAMPIONSHIP"],
    "ANTALYA OPEN": ["ANTALYA", "TURKISH AIRLINES OPEN ANTALYA"],
    "CROATIA OPEN": ["PLAVA LAGUNA CROATIA OPEN UMAG", "UMAG"],
    "ECUADOR OPEN": "QUITO",
    "GAZPROM HUNGARIAN OPEN": ["HUNGARIAN OPEN", "BUDAPEST"],
    "GENERALI OPEN": ["GENERALI OPEN", "KITZBUHEL", "KITZBÜHEL", "KITZBUEHEL"],
    "OPEN DE MOSELLE": ["MOSELLE OPEN", "METZ"],
    "GENEVA OPEN": ["GENEVA", "BANQUE ERIC STURDZA GENEVA OPEN"],
    "GARANTI KOZA SOFIA OPEN": ["SOFIA", "SOFIA OPEN"],
    "EUROPEAN OPEN": ["ANTWERP", "EUROPEAN OPEN"],
    "HALL OF FAME CHAMPIONSHIPS": ["NEWPORT", "HALL OF FAME OPEN"],
    "ABIERTO MEXICANO MIFEL": ["LOS CABOS", "ABIERTO DE TENIS MIFEL PRESENTADO POR CINEMEX"],
    "ISTANBUL OPEN": "ISTANBUL",
    "KREMLIN CUP": ["MOSCOW", "VTB KREMLIN CUP"],
    "LYON OPEN": ["LYON", "OPEN PARC AUVERGNE-RHONE-ALPES LYON"],
    "MERCEDES CUP": ["STUTTGART", "ATP MASTERS 1000 STUTTGART", "MERCEDESCUP"],
    "MILLENNIUM ESTORIL OPEN": ["ESTORIL", "MILLENNIUM ESTORIL OPEN"],
    "BRASIL OPEN": ["SAO PAULO", "BRASIL OPEN"],
    "CHENGDU OPEN": ["CHENGDU", "CHENGDU OPEN"],
    "NEW YORK OPEN": ["NEW YORK", "NEW YORK OPEN"],
    "OPEN 13": ["OPEN 13 PROVENCE", "MARSEILLE"],
    "BMW OPEN": ["MUNICH", "BMW OPEN BY FWU"],
    "OPEN SUD DE FRANCE": ["OPEN SUD DE FRANCE", "MONTPELLIER"],
    "MASTERS CUP": ["TENNIS MASTERS CUP", "NITTO ATP FINALS"],
    "RIO OPEN": ["RIO DE JANEIRO", "RIO OPEN PRESENTED BY CLARO"],
    "ABN AMRO WORLD TENNIS TOURNAMENT": ["ROTTERDAM", "ABN AMRO WORLD TENNIS TOURNAMENT"],
    "WINSTON-SALEM OPEN AT WAKE FOREST UNIVERSITY": ['WINSTON-SALEM', 'WINSTON-SALEM OPEN']
}

#### Score Parsing

In [None]:
WALKOVER_DEFS = [
    'W/O',
    'DEF'
]

def parse_set_score(s):
    if 'RET' in s:
        return ('RETIRE', 'RETIRE')
    elif any([x in s for x in WALKOVER_DEFS]):
        return ('WALKOVER', 'WALKOVER')
    elif 'UNP' in s:
        return ('MATCH_NOT_PLAYED', 'MATCH_NOT_PLAYED')
    else:
        s = s.strip(' (NA)')
        if len(s) == 2:
            return int(s[0]), int(s[1])
        else:
            for b in range(1, len(s)):
                s1, s2 = int(s[:b]), int(s[b:])
                if abs(s1 - s2) <= 2:
                    return s1, s2
            return None, None 

In [None]:
def parse_match_score(s):
    if pd.isnull(s):
        return {}
    set_scores = s.split(';')
    out = {}
    for idx, ss in enumerate(set_scores):
        w, l = parse_set_score(ss)
        out.update({
            'W%d' % (idx + 1): w,
            'L%d' % (idx + 1): l
        })
    return out 

In [None]:
score_df = pd.DataFrame(jd['score'].map(parse_match_score).tolist())

In [None]:
score_df.head()

In [None]:
jd = pd.concat([jd, score_df], axis=1)
jd.shape

In [None]:
score_cols = ['W%d' % s for s in range(1, 6)] + ['L%d' % s for s in range(1, 6)]

#### Joining

In [None]:
odds_df['odds_match_id'] = range(odds_df.shape[0])

In [None]:
jd_2018 = jd[jd['year'] == 2018].copy()

In [None]:
join_cols = sorted([
    'winner_last_name',
    'loser_last_name',
    'tourney_title'
] + score_cols)

In [None]:
def get_tourney(x, idx):
    mapped = TOURNAMENT_MAPPING[x]
    if isinstance(mapped, str):
        return mapped if idx == 0 else None
    if idx >= len(mapped):
        return None
    else:
        return mapped[idx]

idx = 0
merged_dfs = []
while(True):
    print(idx)
    cur_odds = odds_df.copy()
    cur_odds['tourney_title'] = odds_df['Tournament'].map(lambda x: get_tourney(x, idx))
    if cur_odds['tourney_title'].isnull().all():
         break
    merged_dfs.append(
        pd.merge(
            jd_2018,
            cur_odds,
            on=join_cols
        )
    )
    idx += 1

In [None]:
all_merged = pd.concat(merged_dfs)

In [None]:
all_merged.to_csv("./merged_2018.csv", index=False)

In [None]:
assert all_merged['match_id'].value_counts().max() == 1

In [None]:
missing = odds_df[~odds_df['odds_match_id'].isin(all_merged['odds_match_id'].tolist())]

In [None]:
missing.shape

In [None]:
missing['Comment'].value_counts()

In [None]:
missing[missing['Comment'] == 'Completed'][[
    'Winner', 'Loser', 'winner_last_name', 'loser_last_name'
]]

In [None]:
# What tournaments are ALWAYS missing!?
missing[~missing['Tournament'].isin(all_merged['Tournament'])]['Tournament'].value_counts()

In [None]:
missing[missing['Tournament'] == "CHENGDU OPEN"].iloc[0]

In [None]:
TOURNAMENT_MAPPING['FRENCH OPEN']

In [None]:
get_tourney('FRENCH OPEN', 0)

In [None]:
def inspect_match(w=None, l=None):
    if w is not None:
        rel = jd_2018[
            (jd_2018['winner_last_name'] == w)
        ]
    else:
        rel = jd_2018
    if l is not None:
        rel = rel[
            rel['loser_last_name'] == w
        ]
    return rel[[
        'winner_last_name',
        'loser_last_name',
        'tourney_title'
    ] + score_cols]

inspect_match('POLANSKY')

In [None]:
ranked = all_merged[
    all_merged['WRank'].notnull() &
    all_merged['LRank'].notnull()
]

In [None]:
(ranked['WRank'] < ranked['LRank']).mean()

In [None]:
(ranked['B365W'] <= ranked['B365L']).mean()

In [None]:
(ranked['B365W'] < ranked['B365L']).mean()

In [None]:
import numpy as np

with_scores = jd[jd['score'].notnull()]
winner_sets = np.zeros(with_scores.shape[0])
for set_index in range(1, 6):
    winner_sets += (with_scores['W%d' % set_index] > with_scores['L%d' % set_index]).astype(int)

In [None]:
pd.Series(winner_sets).value_counts()

In [None]:
with_scores[winner_sets == 1][['score', 'tourney_url_suffix']]

In [None]:
with_scores[with_scores['score'].map(lambda x: 'W/O' in x)][['winner_name', 'loser_name']]

In [None]:
with_scores