In [14]:
from sqlalchemy import create_engine
import psycopg2
import pandas as pd

def flatten_df(df):
    try:
        #Flatten the dataframe by reducing the levels of the columns
        df.columns = ["_".join([each.strip().replace(" ", "") for each in i]) if "Unnamed" not in i[0] else i[-1].strip().replace(" ", "_") for i in df.columns]
    except Exception as e:
        #Error in Flattening the dataframe
        print("Error occurred unable to flatten the dataframe.")
        print(f"Error message: {str(e)}")
    finally:
        #Return the DataFrame
        return df

def transform_combine(raw_squad_df, raw_opponent_df):
    #Squad Dataframe: Flatten and Creating the Value colum
    raw_squad_df = flatten_df(raw_squad_df.copy())
    raw_squad_df.loc[:, "Value"] = "Squad"
    
    #Opponent Dataframe: Flatten, removing the string "VS", and Creating the Value colum
    raw_opponent_df = flatten_df(raw_opponent_df.copy())
    raw_opponent_df.loc[:, "Squad"] = raw_opponent_df.loc[:, "Squad"].apply(lambda x: " ".join(x.split()[1:]))
    raw_opponent_df.loc[:, "Value"] = "Opponent"
    
    try:
        #Appending the dataframes
        stats_df = raw_opponent_df.append(raw_squad_df, ignore_index=True)
    
    except Exception as e:
        #Error in Flattening the dataframe
        print("Error occurred unable to append the dataframes.")
        print(f"Error message: {str(e)}")
    
    finally:
        #Return the Appended DataFrame
        return stats_df

def pushToDB(table_name, df, conn, if_exists = "replace", index = False):
    
    # Begin a transaction
    transaction = conn.begin()
    
    try:
        # Push the DataFrame to PostgreSQL
        df.to_sql(name= table_name, con= conn, if_exists= if_exists, index= index)
        
        # Commit the transaction
        transaction.commit()
        print(f"{table_name} has been successfully committed.")
        
    except Exception as e:
        # Rollback the transaction if there's an error
        transaction.rollback()
        
        print(f"Error occurred in uploading {table_name}. Transaction has been rolled back.")
        print(f"Error message: {str(e)}")

#Link to Premier League
PL_STATS_URL = r"https://fbref.com/en/comps/9/Premier-League-Stats"
PL_SCORES_FIXTURES_URL = r"https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures"

print("Data Extract Phase Started....")

raw_scores_and_fixtures = pd.read_html(PL_SCORES_FIXTURES_URL)[0]

all_tables = pd.read_html(PL_STATS_URL)

raw_regular_season_overall = all_tables[0]
raw_regular_season_home_away = all_tables[1]

raw_squad_standard_stats_squad = all_tables[2]
raw_squad_standard_stats_opponent = all_tables[3]

raw_squad_goalkeeping_squad = all_tables[4]
raw_squad_goalkeeping_opponent = all_tables[5]

raw_squad_advanced_goalkeeping_squad = all_tables[6]
raw_squad_advanced_goalkeeping_opponent = all_tables[7]

raw_squad_shooting_squad = all_tables[8]
raw_squad_shooting_opponent = all_tables[9]

raw_squad_passing_squad = all_tables[10]
raw_squad_passing_opponent = all_tables[11]

raw_squad_pass_types_squad = all_tables[12]
raw_squad_pass_types_opponent = all_tables[13]

raw_squad_goal_shot_creation_squad = all_tables[14]
raw_squad_goal_shot_creation_opponent = all_tables[15]

raw_squad_defensive_actions_squad = all_tables[16]
raw_squad_defensive_actions_opponent = all_tables[17]

raw_squad_possession_squad = all_tables[18]
raw_squad_possession_opponent = all_tables[19]

raw_squad_playing_time_squad = all_tables[20]
raw_squad_playing_time_opponent = all_tables[21]

raw_squad_miscellaneous_stats_squad = all_tables[22]
raw_squad_miscellaneous_stats_opponent = all_tables[23]

print("Data Extract Phase Ended....")

print("Data Transformation Phase Started....")

print("Regular Season Transformations....")
raw_regular_season_home_away = flatten_df(raw_regular_season_home_away.copy())
raw_regular_season_overall.columns = ["Overall_"+i.strip().replace(" ", "") if i not in ["Rk", "Squad"] else i for i in raw_regular_season_overall.columns]
regular_season = raw_regular_season_overall.merge(right=raw_regular_season_home_away, how='inner', on=["Rk", "Squad"], validate="one_to_one")

print("Standard Stats Transformations....")
standard_stats = transform_combine(raw_squad_df= raw_squad_standard_stats_squad.copy(), raw_opponent_df= raw_squad_standard_stats_opponent.copy())

print("Goalkeeping Stats Transformations....")
goalkeeping_stats = transform_combine(raw_squad_df= raw_squad_goalkeeping_squad.copy(), raw_opponent_df= raw_squad_goalkeeping_opponent.copy())

print("Advanced Goalkeeping Stats Transformations....")
advanced_goalkeeping_stats = transform_combine(raw_squad_df= raw_squad_advanced_goalkeeping_squad.copy(), raw_opponent_df= raw_squad_advanced_goalkeeping_opponent.copy())

print("Shooting Stats Transformations....")
shooting_stats = transform_combine(raw_squad_df= raw_squad_shooting_squad.copy(), raw_opponent_df= raw_squad_shooting_opponent.copy())

print("Passing Stats Transformations....")
passing_stats = transform_combine(raw_squad_df= raw_squad_passing_squad.copy(), raw_opponent_df= raw_squad_passing_opponent.copy())

print("Passing Types Stats Transformations....")
passing_types_stats = transform_combine(raw_squad_df= raw_squad_pass_types_squad.copy(), raw_opponent_df= raw_squad_pass_types_opponent.copy())

print("Goal Shot Creation Stats Transformations....")
goal_shot_creation_stats = transform_combine(raw_squad_df= raw_squad_goal_shot_creation_squad.copy(), raw_opponent_df= raw_squad_goal_shot_creation_opponent.copy())

print("Defensive Action Stats Transformations....")
defensive_action_stats = transform_combine(raw_squad_df= raw_squad_defensive_actions_squad.copy(), raw_opponent_df= raw_squad_defensive_actions_opponent.copy())

print("Posession Stats Transformations....")
possession_stats = transform_combine(raw_squad_df= raw_squad_possession_squad.copy(), raw_opponent_df= raw_squad_possession_opponent.copy())

print("Playing Time Stats Transformations....")
playing_time_stats = transform_combine(raw_squad_df= raw_squad_playing_time_squad.copy(), raw_opponent_df= raw_squad_playing_time_opponent.copy())

print("Miscellaneous Stats Transformations....")
miscellaneous_stats = transform_combine(raw_squad_df= raw_squad_miscellaneous_stats_squad.copy(), raw_opponent_df= raw_squad_miscellaneous_stats_opponent.copy())

print("Data Transformation Phase Ended....")

database="football-db"
user='user'
password='password'
host='192.168.59.101'
port= '30432'

try:
    print("Establishing Connection with DB....")
    db = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")
    conn = db.connect()
    print("Successfully Established Connection with DB....")
    
except Exception as e:
    print("Unable to Establish Connection with DB....")
    raise(e)


print("Data Loading Phase Started....")

#Creating/Updating the regular_season database
pushToDB(table_name="regular_season", df=regular_season, conn=conn)

#Creating/Updating the standard_stats database
pushToDB(table_name="standard_stats", df=standard_stats, conn=conn)

#Creating/Updating the goalkeeping_stats database
pushToDB(table_name="goalkeeping_stats", df=goalkeeping_stats, conn=conn)

#Creating/Updating the advanced_goalkeeping_stats database
pushToDB(table_name="advanced_goalkeeping_stats", df=advanced_goalkeeping_stats, conn=conn)

#Creating/Updating the shooting_stats database
pushToDB(table_name="shooting_stats", df=shooting_stats, conn=conn)

#Creating/Updating the passing_stats database
pushToDB(table_name="passing_stats", df=passing_stats, conn=conn)

#Creating/Updating the passing_types_stats database
pushToDB(table_name="passing_types_stats", df=passing_types_stats, conn=conn)

#Creating/Updating the goal_shot_creation_stats database
pushToDB(table_name="goal_shot_creation_stats", df=goal_shot_creation_stats, conn=conn)

#Creating/Updating the defensive_action_stats database
pushToDB(table_name="defensive_action_stats", df=defensive_action_stats, conn=conn)

#Creating/Updating the possession_stats database
pushToDB(table_name="possession_stats", df=possession_stats, conn=conn)

#Creating/Updating the playing_time_stats database
pushToDB(table_name="playing_time_stats", df=playing_time_stats, conn=conn)

#Creating/Updating the miscellaneous_stats database
pushToDB(table_name="miscellaneous_stats", df=miscellaneous_stats, conn=conn)


# Close the connection
conn.close()

print("Data Loading Phase Ended....")

Data Extract Phase Started....
Data Extract Phase Ended....
Data Transformation Phase Started....
Regular Season Transformations....
Standard Stats Transformations....
Goalkeeping Stats Transformations....
Advanced Goalkeeping Stats Transformations....
Shooting Stats Transformations....
Passing Stats Transformations....
Passing Types Stats Transformations....
Goal Shot Creation Stats Transformations....
Defensive Action Stats Transformations....
Posession Stats Transformations....
Playing Time Stats Transformations....
Miscellaneous Stats Transformations....
Data Transformation Phase Ended....
Establishing Connection with DB....
Successfully Established Connection with DB....
Data Loading Phase Started....
regular_season has been successfully committed.
standard_stats has been successfully committed.
goalkeeping_stats has been successfully committed.
advanced_goalkeeping_stats has been successfully committed.
shooting_stats has been successfully committed.
passing_stats has been succes

In [10]:
type(conn)

sqlalchemy.engine.base.Connection

In [16]:
connection = psycopg2.connect(user="user",
                              password="password",
                              host='192.168.59.101',
                              port= '30432',
                              database="football-db")

cursor = connection.cursor()
postgreSQL_select_Query = """SELECT table_name FROM information_schema.tables
       WHERE table_schema = 'public'"""

cursor.execute(postgreSQL_select_Query)
mobile_records = cursor.fetchall()


connection.close()
print("Print each row and it's columns values")
mobile_records

OperationalError: connection to server at "192.168.59.101", port 30432 failed: Connection refused (0x0000274D/10061)
	Is the server running on that host and accepting TCP/IP connections?


In [11]:
connection = psycopg2.connect(user="user",
                              password="password",
                              host='192.168.59.101',
                              port= '30432',
                              database="football-db")

connection.autocommit = True

cursor = connection.cursor()
postgreSQL_select_Query = """DROP TABLE regular_season, standard_stats, goalkeeping_stats, 
advanced_goalkeeping_stats, shooting_stats, passing_stats, passing_types_stats, goal_shot_creation_stats, defensive_action_stats, possession_stats, 
playing_time_stats, miscellaneous_stats;"""

cursor.execute(postgreSQL_select_Query)

connection.commit()
connection.close()

In [17]:
pd.__version__

'1.2.4'

In [22]:
import lxml as qal


In [23]:
qal.__version__

'4.6.3'