In [12]:
# --- SQL Server Connection Test ---
import pyodbc
import pandas as pd

# STEP 1: Check available ODBC drivers
print("üß© Available ODBC Drivers:")
for driver in pyodbc.drivers():
  print("   ", driver)

# STEP 2: Define connection details
server = "localhost\\SQL2022"              # Change if your instance name differs
database = "QuickStart_DataScience"        # Change to your database
driver = "SQL Server"                      # Use one listed above
#driver = "ODBC Driver 18 for SQL Server"  # Use one listed above
trusted_connection = "yes"                 # or "no" if using SQL auth
encrypt = "no"                             # disable encryption for local

conn_str = ( f"DRIVER={{{driver}}};"
             f"SERVER={server};"
             f"DATABASE={database};"
             f"Trusted_Connection={trusted_connection};"
             f"Encrypt={encrypt};" )

print("\nüîó Connecting with:")
print(conn_str)

# STEP 3: Test connection and query
try:
  with pyodbc.connect(conn_str) as conn:
    query = "SELECT TOP 20 name, database_id FROM sys.databases;"
    df = pd.read_sql(query, conn)
    print("\n‚úÖ Connection successful! Sample query output:")
    display(df)
except Exception as e:
  print("\n‚ùå Connection failed!")
  print(str(e))


üß© Available ODBC Drivers:
    SQL Server
    ODBC Driver 17 for SQL Server
    ODBC Driver 18 for SQL Server
    Microsoft Access Driver (*.mdb, *.accdb)
    Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)
    Microsoft Access Text Driver (*.txt, *.csv)
    Microsoft Access dBASE Driver (*.dbf, *.ndx, *.mdx)

üîó Connecting with:
DRIVER={SQL Server};SERVER=localhost\SQL2022;DATABASE=QuickStart_DataScience;Trusted_Connection=yes;Encrypt=no;

‚úÖ Connection successful! Sample query output:


  df = pd.read_sql(query, conn)


Unnamed: 0,name,database_id
0,master,1
1,tempdb,2
2,model,3
3,msdb,4
4,DWDiagnostics,5
5,DWConfiguration,6
6,DWQueue,7
7,ReportServer,8
8,ReportServerTempDB,9
9,AdventureWorks,10


In [13]:
from sqlalchemy import create_engine
import pandas as pd

engine = create_engine("mssql+pyodbc://localhost\\SQL2022/QuickStart_DataScience?driver=ODBC+Driver+18+for+SQL+Server&trusted_connection=yes&Encrypt=no")


In [14]:
# NOTE: 'engine' defined above ...

select_all_from_allstarfull   = "SELECT * FROM dbo.AllstarFull"
select_top10_from_allstarfull = "SELECT TOP 10 * FROM dbo.AllstarFull"

df = pd.read_sql(select_top10_from_allstarfull, engine)
df.head()


Unnamed: 0,playerID,yearID,gameNum,gameID,teamID,lgID,GP,startingPos
0,kalinal01,1957,0,NLS195707090,DET,AL,1,9.0
1,demaejo01,1957,0,NLS195707090,KC1,AL,0,
2,grimbo01,1957,0,NLS195707090,NYA,AL,1,
3,howarel01,1957,0,NLS195707090,NYA,AL,0,
4,loesbi01,1957,0,NLS195707090,BAL,AL,1,


In [15]:
# NOTE: 'engine' defined above ...

select_all_from_people   = "SELECT * FROM dbo.People"
select_top10_from_people = "SELECT TOP 10 * FROM dbo.People"

df = pd.read_sql(select_top10_from_people, engine)
df.head()

Unnamed: 0,ID,playerID,birthYear,birthMonth,birthDay,birthCity,birthCountry,birthState,deathYear,deathMonth,...,nameLast,nameGiven,weight,height,bats,throws,debut,bbrefID,finalGame,retroID
0,15408,reinhja01,1992,7,19,Charlotte,USA,NC,,,...,Reinheimer,John Patrick,185.0,73.0,R,R,2017-08-01,reinhja01,2018-09-29,reinj001
1,15409,reiniza01,1993,1,28,San Antonio,USA,TX,,,...,Reininger,Zachary Ryan,190.0,75.0,B,R,2017-08-27,reiniza01,2019-09-26,reinz001
2,15410,reipsch01,1856,2,7,New York,USA,NY,1910.0,3.0,...,Reipschlager,Charles W.,160.0,66.0,R,R,1883-05-02,reipsch01,1887-09-06,reipc101
3,15411,reisbo01,1909,1,2,Woodside,USA,NY,1973.0,5.0,...,Reis,Robert Joseph Thomas,175.0,73.0,R,R,1931-09-07,reisbo01,1938-09-21,reisb102
4,15412,reisepe01,1919,3,17,St. Louis,USA,MO,1981.0,10.0,...,Reiser,Harold Patrick,185.0,71.0,L,R,1940-07-23,reisepe01,1952-07-05,reisp101


In [34]:
# QUESTION: 
# ‚Ä¢ Connects to an SQL database file and queries for all players who have played at least 50 games and are still
#   active. Use the ‚ÄúfinalGame‚Äù field from the ‚ÄúPeople‚Äù table to determine if a player is active. Retrieve weight,
#   throws, bats, throws, all birth-related and all name-related columns from the ‚ÄúPeople‚Äù table and retrieve all 
#   columns from the ‚ÄúBatting‚Äù table.
# ‚Ä¢ Converts this data into either an R data frame or a pandas data frame.
# ‚Ä¢ Adds a calculated column with the player‚Äôs age and a calculated column with each player‚Äôs first and last name concatenated.
# ‚Ä¢ Once the calculated columns are added, drops the other columns related to birth date and name.
# ‚Ä¢ Deletes any rows with missing values

command = """
;WITH active_players AS (
  SELECT
     TRIM(P.nameFirst) + ' ' + TRIM(P.nameLast) AS playerName
    ,P.*
  FROM
    dbo.People AS P
  WHERE
    P.finalGame > '2017-12-31'
),
eligible_players AS (
  SELECT
    A.playerID
  FROM
    dbo.Appearances AS A
  GROUP BY
    A.playerID
  HAVING
    SUM(CAST(A.G_all AS INT)) >= 50
)
SELECT
   AP.playerID
  ,AP.playerName
  ,AP.[weight]
  ,AP.throws
  ,DATEDIFF(YEAR, DATEFROMPARTS(AP.birthYear, AP.birthMonth, AP.birthDay), GETDATE())
    - CASE 
        WHEN MONTH(GETDATE()) < AP.birthMonth OR
             MONTH(GETDATE()) = AP.birthMonth AND 
               DAY(GETDATE()) < AP.birthDay 
        THEN 1 ELSE 0 
      END AS 'playerAge'
  ,AP.birthCountry
  ,AP.birthState
  ,AP.birthCity
  ,B.*
FROM active_players AS AP
  INNER JOIN eligible_players AS EP
    INNER JOIN dbo.Batting AS B
      ON EP.playerID = B.playerID
    ON AP.playerID = EP.playerID
ORDER BY 
   AP.playerID  ASC
  ,B.yearID     DESC
"""

df = pd.read_sql(command, engine)
df


Unnamed: 0,playerID,playerName,weight,throws,playerAge,birthCountry,birthState,birthCity,playerID.1,yearID,...,SB,CS,BB,SO,IBB,HBP,SH,SF,GIDP,G_old
0,abadfe01,Fernando Abad,235,L,39,D.R.,La Romana,La Romana,abadfe01,2023,...,0,0,0,0,0,0,0,0,0,
1,abadfe01,Fernando Abad,235,L,39,D.R.,La Romana,La Romana,abadfe01,2021,...,0,0,0,0,0,0,0,0,0,
2,abadfe01,Fernando Abad,235,L,39,D.R.,La Romana,La Romana,abadfe01,2019,...,0,0,0,0,0,0,0,0,0,
3,abadfe01,Fernando Abad,235,L,39,D.R.,La Romana,La Romana,abadfe01,2017,...,0,0,0,0,0,0,0,0,0,
4,abadfe01,Fernando Abad,235,L,39,D.R.,La Romana,La Romana,abadfe01,2016,...,0,0,0,1,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13573,zuninmi01,Mike Zunino,235,R,34,USA,FL,Cape Coral,zuninmi01,2017,...,1,0,39,160,0,8,0,1,8,
13574,zuninmi01,Mike Zunino,235,R,34,USA,FL,Cape Coral,zuninmi01,2016,...,0,0,21,65,0,6,0,1,0,
13575,zuninmi01,Mike Zunino,235,R,34,USA,FL,Cape Coral,zuninmi01,2015,...,0,1,21,132,0,5,8,2,6,
13576,zuninmi01,Mike Zunino,235,R,34,USA,FL,Cape Coral,zuninmi01,2014,...,0,3,17,158,1,17,0,4,12,
