Merge pull request #42 from sportsdataverse/armstjc/add-retrosheet

Armstjc/add retrosheet Release Candidate 2
sportsdataverse · Sep 5, 2022 · 69c996f · 69c996f · vercel · Sep 5, 2022
2 parents c5e1271 + 3f0a795
commit 69c996f
Show file tree

Hide file tree

Showing 11 changed files with 1,241 additions and 258 deletions.
diff --git a/docs/docs/mlb/index.md b/docs/docs/mlb/index.md
diff --git a/docs/sidebars.js b/docs/sidebars.js
@@ -11,60 +11,48 @@
 
 module.exports = {
   // By default, Docusaurus generates a sidebar from the docs folder structure
-  tutorialSidebar: [{type: 'autogenerated', dirName: '.'}],
+  tutorialSidebar: [{ type: "autogenerated", dirName: "." }],
   docs: [
-    { type: 'doc',
-      id: 'intro',
-      label: 'Getting Started'
+    { type: "doc", id: "intro", label: "Getting Started" },
+    {
+      type: "category",
+      label: "CFB",
+      items: ["cfb/index"],
     },
     {
-      type: 'category',
-      label: 'CFB',
-      items: [
-        'cfb/index',
-      ],
+      type: "category",
+      label: "MBB",
+      items: ["mbb/index"],
     },
     {
-      type: 'category',
-      label: 'NFL',
-      items: [
-        'nfl/index',
-      ],
+      type: "category",
+      label: "MLB",
+      items: ["mlb/index"],
     },
     {
-      type: 'category',
-      label: 'MBB',
-      items: [
-        'mbb/index',
-      ],
+      type: "category",
+      label: "NBA",
+      items: ["nba/index"],
     },
     {
-      type: 'category',
-      label: 'NBA',
-      items: [
-        'nba/index',
-      ],
+      type: "category",
+      label: "NFL",
+      items: ["nfl/index"],
     },
     {
-      type: 'category',
-      label: 'WBB',
-      items: [
-        'wbb/index',
-      ],
+      type: "category",
+      label: "NHL",
+      items: ["nhl/index"],
     },
     {
-      type: 'category',
-      label: 'WNBA',
-      items: [
-        'wnba/index',
-      ],
+      type: "category",
+      label: "WBB",
+      items: ["wbb/index"],
     },
     {
-      type: 'category',
-      label: 'NHL',
-      items: [
-        'nhl/index',
-      ],
+      type: "category",
+      label: "WNBA",
+      items: ["wnba/index"],
     },
   ],
   // But you can create a sidebar manually
@@ -77,4 +65,4 @@ module.exports = {
     },
   ],
    */
-};
+};
diff --git a/sportsdataverse/mlb/__init__.py b/sportsdataverse/mlb/__init__.py
@@ -1,6 +1,8 @@
 from sportsdataverse.mlb.mlb_loaders import *
+from sportsdataverse.mlb.retrosheet import *
+from sportsdataverse.mlb.retrosplits import *
 from sportsdataverse.mlb.mlbam_games import *
 from sportsdataverse.mlb.mlbam_players import *
 from sportsdataverse.mlb.mlbam_reports import *
 from sportsdataverse.mlb.mlbam_stats import *
-from sportsdataverse.mlb.mlbam_teams import *
+from sportsdataverse.mlb.mlbam_teams import *
diff --git a/sportsdataverse/mlb/mlb_loaders.py b/sportsdataverse/mlb/mlb_loaders.py
@@ -7,15 +7,16 @@
 
 import os
 def mlbam_copyright_info(saveFile=False,returnFile=False):
-	"""Displays the copyright info for the MLBAM API.
+	"""
+	Displays the copyright info for the MLBAM API.
 
 	Args:
 		saveFile (boolean) = False
 		If saveFile is set to True, the copyright file generated is saved.
 
 		returnFile (boolean) = False
 		If returnFile is set to True, the copyright file is returned.
-
+	
 	"""
 	url = "http://gdx.mlb.com/components/copyright.txt"
 	resp = download(url=url)

diff --git a/sportsdataverse/mlb/mlbam_games.py b/sportsdataverse/mlb/mlbam_games.py
@@ -6,13 +6,12 @@
 import json
 from sportsdataverse.dl_utils import download
 from datetime import datetime
-import os
 
 
-def mlbam_schedule(season:int,gameType="R"):
-	"""Retrieves the start and end date for games for every league, and the MLB,
-	for a given season.
 
+def mlbam_schedule(season:int,gameType="R"):
+	"""
+	Retrieves the start and end date for games for every league, and the MLB,for a given season.
 	This function does not get individual games.
 
 	Args:
@@ -31,9 +30,10 @@ def mlbam_schedule(season:int,gameType="R"):
 			'F' - First Round (Wild Card)
 			'L' - League Championship
 			'W' - World Series
+
+	Returns: 
+		A pandas dataframe containing MLB scheduled games.
 	"""
-	#pullCopyrightInfo()
-	#p_df = pd.DataFrame()
 	main_df = pd.DataFrame()
 
 	searchURL = "http://lookup-service-prod.mlb.com/json/named.org_game_type_date_info.bam?current_sw='Y'&sport_code='mlb'&"
@@ -59,9 +59,7 @@ def mlbam_schedule(season:int,gameType="R"):
 
 	resp = download(searchURL)
 
-	#print(searchURL)
 	resp_str = str(resp, 'UTF-8')
-	#print(resp_str)
 
 	resp_json = json.loads(resp_str)
 	try:
@@ -70,11 +68,7 @@ def mlbam_schedule(season:int,gameType="R"):
 		result_count = 0
 
 	if result_count > 0:
-		#print(resp_json['player_teams']['queryResults']['row'])
-		#print(f'{result_count} statlines found,\nParsing results into a dataframe.')
-		#players = resp_json['search_player_all']['queryResults']['row']
 		main_df = json_normalize(resp_json['org_game_type_date_info']['queryResults']['row'])
-		#print('Done')
 	else:
 		print(f'No results found for the provided playerID. \nTry a diffrient search for better results.')
 	return main_df
diff --git a/sportsdataverse/mlb/mlbam_players.py b/sportsdataverse/mlb/mlbam_players.py
@@ -5,12 +5,11 @@
 from pandas import json_normalize
 import json
 from sportsdataverse.dl_utils import download
-from tqdm import tqdm
 
-import os
 
 def mlbam_search_mlb_players(search:str,isActive=""):
-	"""Searches for an MLB player in the MLBAM API.
+	"""
+	Searches for an MLB player in the MLBAM API.
 
 	Args:
 		search (string):
@@ -24,15 +23,17 @@ def mlbam_search_mlb_players(search:str,isActive=""):
 			If you want active players, set isActive to "Y" or "Yes".
 
 			If you want inactive players, set isActive to "N" or "No".
+
+	Returns:
+		A pandas dataframe containing MLBAM players whose name(s) matches the input string.
+
 	"""
-	#pullCopyrightInfo()
 	searchURL = "http://lookup-service-prod.mlb.com/json/named.search_player_all.bam?sport_code='mlb'"
 
 	p_df = pd.DataFrame()
 	main_df = pd.DataFrame()
 
 	if len(isActive) == 0:
-		#print('Searching for all MLB players.')
 		print('')
 	elif isActive.lower() == "y" or isActive.lower() == "yes":
 		searchURL = searchURL + "&active_sw='Y'"
@@ -46,11 +47,9 @@ def mlbam_search_mlb_players(search:str,isActive=""):
 
 		searchURL= searchURL + f"&name_part='{search}%25'"
 
-		#searchURL = urllib.parse.quote_plus(str(searchURL))
 		resp = download(searchURL)
 
 		resp_str = str(resp, 'UTF-8')
-		#print(resp_str)
 
 		resp_json = json.loads(resp_str)
 		result_count = int(resp_json['search_player_all']['queryResults']['totalSize'])
@@ -75,9 +74,10 @@ def mlbam_player_info(playerID:int):
 	Args:
 		playerID (int):
 			Required parameter. If no playerID is provided, the function wil not work.
+	
+	Returns:
+		A pandas dataframe cointaining player information for the specified MLBAM player ID.
 	"""
-	#pullCopyrightInfo()
-	#p_df = pd.DataFrame()
 	main_df = pd.DataFrame()
 
 	searchURL = "http://lookup-service-prod.mlb.com/json/named.player_info.bam?sport_code='mlb'&player_id="
@@ -88,12 +88,9 @@ def mlbam_player_info(playerID:int):
 	else:
 		searchURL= searchURL + f"\'{playerID}\'%27"
 
-		#searchURL = urllib.parse.quote_plus(str(searchURL))
 		resp = download(searchURL)
 
-		#print(searchURL)
 		resp_str = str(resp, 'UTF-8')
-		#print(resp_str)
 
 		resp_json = json.loads(resp_str)
 		try:
@@ -102,10 +99,6 @@ def mlbam_player_info(playerID:int):
 			result_count = 0
 
 		if result_count > 0:
-			#print(resp_json['player_info']['queryResults']['row'])
-
-			#print(f'{result_count} players found,\nParsing results into a dataframe.')
-			#players = resp_json['search_player_all']['queryResults']['row']
 			main_df = json_normalize(resp_json['player_info']['queryResults']['row'])
 			print('Done')
 		else:
@@ -114,8 +107,8 @@ def mlbam_player_info(playerID:int):
 		return main_df
 
 def mlbam_player_teams(playerID:int,season:int):
-	"""Retrieves the info regarding which teams that player played for in a given
-	season, or in the player's career
+	"""
+	Retrieves the info regarding which teams that player played for in a given season, or in the player's career.
 
 	Args:
 		playerID (int):
@@ -124,9 +117,10 @@ def mlbam_player_teams(playerID:int,season:int):
 		season (int):
 			Required parameter. If provided, the search will only look for teams
 			that player played for in that season.
+
+	Returns:
+		A pandas dataframe containing teams a player played for in that season.
 	"""
-	#pullCopyrightInfo()
-	#p_df = pd.DataFrame()
 	main_df = pd.DataFrame()
 
 	searchURL = "http://lookup-service-prod.mlb.com/json/named.player_teams.bam?"
@@ -144,12 +138,9 @@ def mlbam_player_teams(playerID:int,season:int):
 	else:
 		searchURL= searchURL + f"player_id=\'{playerID}\'"
 
-		#searchURL = urllib.parse.quote_plus(str(searchURL))
 		resp = download(searchURL)
 
-		#print(searchURL)
 		resp_str = str(resp, 'UTF-8')
-		#print(resp_str)
 
 		resp_json = json.loads(resp_str)
 		try:
@@ -158,10 +149,8 @@ def mlbam_player_teams(playerID:int,season:int):
 			result_count = 0
 
 		if result_count > 0:
-			#print(resp_json['player_teams']['queryResults']['row'])
 
 			print(f'{result_count} players found,\nParsing results into a dataframe.')
-			#players = resp_json['search_player_all']['queryResults']['row']
 			main_df = json_normalize(resp_json['player_teams']['queryResults']['row'])
 			print('Done')
 		else: