Let's assume that the first time they appear is when they are 'recruited'. Once they are at a new place, let's make that connection show up?

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('team_roster_data_untouched.csv')

In [4]:
print(df.columns)
df.dropna(subset=["firstName", "lastName"], inplace=True)
df.reset_index(drop=True, inplace=True)

Index(['id', 'firstName', 'lastName', 'team', 'weight', 'height', 'jersey',
       'year', 'position', 'homeCity', 'homeState', 'homeCountry',
       'homeLatitude', 'homeLongitude', 'homeCountyFIPS', 'recruitIds'],
      dtype='object')


In [5]:
p = {}
current_ids = set[str]()

for i in range(len(df)):

    # Get data
    team = df.loc[i, 'team']
    first, last = df.loc[i, 'firstName'], df.loc[i, 'lastName']
    name = f"{first} {last}"
    ID = df.loc[i, 'id']

    # Check if there is a new name
    if ID not in current_ids:
        current_ids.add(ID)
        p[ID] = {
            "name": name,
            "teams": [team]
        }
    else:
        p[ID]["teams"].append(team)

    


In [6]:
with open("names.txt", "w") as f:
    for ID, data in p.items():
        f.write(f"{data['name']}: {', '.join(map(str, data['teams']))}\n")


In [7]:
team_lens = {}
names_to_check = []

for ID, data in p.items():
    current_len = len(data['teams'])
    if current_len not in team_lens:
        team_lens[current_len] = 0
    team_lens[current_len] += 1

    if current_len > 7:
        names_to_check.append(ID)

for i in sorted(team_lens.keys()):
    print(f"{i}: {team_lens[i]}")


1: 51884
2: 27417
3: 20984
4: 22571
5: 11936
6: 2955
7: 406
8: 52
9: 6
10: 4
11: 3
13: 3
14: 1


In [8]:
print(names_to_check)

[np.int64(147386), np.int64(160248), np.int64(157607), np.int64(183335), np.int64(189239), np.int64(191520), np.int64(241147), np.int64(174194), np.int64(232454), np.int64(237150), np.int64(480950), np.int64(485026), np.int64(3929118), np.int64(518336), np.int64(545729), np.int64(546537), np.int64(559719), np.int64(559747), np.int64(3125958), np.int64(536118), np.int64(3895827), np.int64(3916620), np.int64(552356), np.int64(3672882), np.int64(3707332), np.int64(3895858), np.int64(3910654), np.int64(3912561), np.int64(3932934), np.int64(3949056), np.int64(4038464), np.int64(3860070), np.int64(3916437), np.int64(4029790), np.int64(4038729), np.int64(4039555), np.int64(4046683), np.int64(4047647), np.int64(4243960), np.int64(3916833), np.int64(3917959), np.int64(3918640), np.int64(3929783), np.int64(4034854), np.int64(4036216), np.int64(4036332), np.int64(4037304), np.int64(4240493), np.int64(4243220), np.int64(4362646), np.int64(4569594), np.int64(4571882), np.int64(4038961), np.int64(40

In [9]:
# This guy actually only played in 2014
print(df[df["id"] == 147386])

            id firstName lastName   team  weight  height  jersey  year  \
2614    147386      Josh    Smith  Akron     NaN    73.0    47.0  2004   
5569    147386      Josh    Smith  Akron     NaN    73.0    47.0  2005   
8773    147386      Josh    Smith  Akron     NaN    73.0    47.0  2006   
12449   147386      Josh    Smith  Akron     NaN    73.0    47.0  2007   
16861   147386      Josh    Smith  Akron     NaN    73.0    47.0  2008   
28763   147386      Josh    Smith  Akron     NaN    73.0    47.0  2009   
42599   147386      Josh    Smith  Akron     NaN    73.0    47.0  2010   
57004   147386      Josh    Smith  Akron     NaN    73.0    47.0  2011   
71490   147386      Josh    Smith  Akron     NaN    73.0    47.0  2012   
86273   147386      Josh    Smith  Akron     NaN    73.0    47.0  2013   
101451  147386      Josh    Smith  Akron     NaN    73.0    47.0  2014   

       position    homeCity homeState homeCountry  homeLatitude  \
2614         WR  Cincinnati        OH       

In [10]:
print(df[df["id"] == names_to_check[1]])
# He registered in 2004, but didn't play until 2009?

           id firstName lastName            team  weight  height  jersey  \
3329   160248      Mike   Naples  Boston College     NaN    76.0    89.0   
6572   160248      Mike   Naples  Boston College     NaN    76.0    89.0   
9740   160248      Mike   Naples  Boston College     NaN    76.0    89.0   
13155  160248      Mike   Naples  Boston College     NaN    76.0    89.0   
17088  160248      Mike   Naples  Boston College     NaN    76.0    89.0   
28787  160248      Mike   Naples  Boston College     NaN    76.0    89.0   
42604  160248      Mike   Naples  Boston College     NaN    76.0    89.0   
57008  160248      Mike   Naples  Boston College     NaN    76.0    89.0   
71493  160248      Mike   Naples  Boston College     NaN    76.0    89.0   
86277  160248      Mike   Naples  Boston College     NaN    76.0    89.0   

       year position    homeCity homeState homeCountry  homeLatitude  \
3329   2004       TE  Branchburg        NJ         USA     40.603041   
6572   2005       T

In [11]:
different_teams = []
for ID in names_to_check:
    unique_teams = set(p[ID]['teams'])
    if len(unique_teams) > 1:
        different_teams.append({
            'id': ID,
            'unique_team_count': len(unique_teams),
            'unique_teams': unique_teams
        })
print(f"Number of people in names_to_check with more than 1 unique team: {len(different_teams)}")
for entry in different_teams:
    print(f"ID: {entry['id']} | Unique Teams: {entry['unique_team_count']} | Teams: {entry['unique_teams']}")



Number of people in names_to_check with more than 1 unique team: 53
ID: 189239 | Unique Teams: 2 | Teams: {'Illinois', 'Stephen F. Austin'}
ID: 485026 | Unique Teams: 2 | Teams: {'BYU', 'Southern Utah'}
ID: 545729 | Unique Teams: 2 | Teams: {'UTSA', 'Houston'}
ID: 546537 | Unique Teams: 2 | Teams: {'Tennessee', 'Arizona State'}
ID: 559719 | Unique Teams: 2 | Teams: {'Tennessee', 'California'}
ID: 559747 | Unique Teams: 2 | Teams: {'Tennessee', 'Duke'}
ID: 3125958 | Unique Teams: 3 | Teams: {'Massachusetts', 'UL Monroe', 'UTSA'}
ID: 536118 | Unique Teams: 2 | Teams: {'BYU', "Hawai'i"}
ID: 3895827 | Unique Teams: 3 | Teams: {'BYU', 'North Carolina', 'South Carolina'}
ID: 3916620 | Unique Teams: 2 | Teams: {'Eastern Michigan', 'Buffalo'}
ID: 3672882 | Unique Teams: 2 | Teams: {'Michigan', 'Central Michigan'}
ID: 3707332 | Unique Teams: 2 | Teams: {'Oklahoma', 'Arkansas State'}
ID: 3895858 | Unique Teams: 2 | Teams: {'SMU', 'Texas A&M'}
ID: 3910654 | Unique Teams: 3 | Teams: {'Sacramento S