In [1]:
%%writefile home_page.py

import sys
import streamlit as st
from time import sleep
sys.path.append('.')   
from custom_transformers import LogTransformer, Handle_outliers_lb_ub, FrequencyEncoder

st.set_page_config(page_title="FIFA 21 Analysis", page_icon="⚽",
                     layout="wide",initial_sidebar_state="collapsed")

st.markdown("""<h1 style="color:white;text-align:center;">  Player Market value of FIFA 21 prediction 📈</h1>""",
             unsafe_allow_html= True)

# dark mode theme
st.markdown("""<style>/* Background & text color */
        body { background-color: #0E1117;color: white;}
        /* Streamlit container tweaks */.stApp 
        {background-color: #0E1117;}
        h1, h2, h3, h4, h5, h6 {color: #FFA500;  
        /* orange accent */font-family: 'Segoe UI', sans-serif;}
        p {color: #E0E0E0;}
        /* Button styling */div.stButton >
        button {background-color: #5353ec;
        color: white; border-radius: 12px;font-weight: bold;}
        div.stButton > button:hover {background-color: #FFA500;
        color: #000;}</style>""", unsafe_allow_html=True)

image_path = "images/7.jpg"
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
    st.image(image_path, use_container_width=True)

st.markdown("""<p style='text-align:center; color:red; font-size:18px;'>Discover insights, analyze data, 
and predict FIFA 21 Player Market Values Using CatBoost Models.</p>""", unsafe_allow_html=True)
st.markdown("<hr>", unsafe_allow_html=True)

col1, col2, col3, col4 = st.columns(4, gap="large")
if "current_page" not in st.session_state:
    st.session_state["current_page"] = "home"

def go_to(page):
    st.session_state["fade"] = True
    sleep(0.3)
    st.session_state["current_page"] = page

with col1:
    if st.button("ℹ️ About"):
        go_to("pages/about.py")
        st.switch_page("pages/about.py")

with col2:
    if st.button("📊 EDA"):
        go_to("pages/eda1.py")
        st.switch_page("pages/eda1.py")

with col3:
    if st.button("🤖 Modeling"):
        go_to("pages/modeling.py")
        st.switch_page("pages/modeling.py")

with col4:
    if st.button("📈 Presentation"):
        go_to("pages/presntation.py")
        st.switch_page("pages/presntation.py")

if "fade" in st.session_state and st.session_state["fade"]:
    st.markdown( """<style>.stApp {animation: fadeEffect 0.7s;}@keyframes fadeEffect {from {opacity: 0;}
            to {opacity: 1;}}</style>""", unsafe_allow_html=True)
    st.session_state["fade"] = False


Overwriting home_page.py


In [2]:
! streamlit run home_page.py

^C


In [None]:
%%writefile custom_transformers.py

from sklearn.base import BaseEstimator, TransformerMixin
import streamlit as st
import pandas as pd
import numpy as np


class LogTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        # Handle both Series and DataFrame
        if isinstance(X, pd.Series):
            self.n_features_in_ = 1
        else:
            self.n_features_in_ = X.shape[1]
        return self

    def transform(self, X, y=None):
        # Convert Series to DataFrame to handle single column safely
        if isinstance(X, pd.Series):
            X = X.to_frame()
        assert X.shape[1] == self.n_features_in_
        X = np.where(X < 0, 0, X)  # remove negatives before log
        return np.log1p(X)
log_transformer = LogTransformer()

class Handle_outliers_lb_ub(BaseEstimator, TransformerMixin):

    def fit(self, X, y=None):
      
        if not isinstance(X, pd.DataFrame):
            X = pd.DataFrame(X)
        
        self.bounds_ = {}
        for col in X.columns:
            q1 = X[col].quantile(0.25)
            q3 = X[col].quantile(0.75)
            iqr = q3 - q1
            lb = q1 - 1.5 * iqr
            ub = q3 + 1.5 * iqr
            self.bounds_[col] = (lb, ub)
        return self

    def transform(self, X, y=None):
        if not isinstance(X, pd.DataFrame):
            X = pd.DataFrame(X)
        
        X = X.copy()
        for col, (lb, ub) in self.bounds_.items():
            X[col] = np.clip(X[col], lb, ub)
        return X
h_lb_ub = Handle_outliers_lb_ub()

class FrequencyEncoder(BaseEstimator, TransformerMixin):
    def __init__(self, columns=None):
        self.columns = columns
        self.freq_maps = {}

    def fit(self, X, y=None):
        # Create a mapping of value_counts for each column
        for col in self.columns:
            self.freq_maps[col] = X[col].value_counts(normalize=True)
        return self

    def transform(self, X):
        X = X.copy()
        # Apply frequency encoding to each column
        for col in self.columns:
            X[col + '_freq'] = X[col].map(self.freq_maps[col]).fillna(0)
            X.drop(col, axis=1, inplace=True)
        return X
frequency_encoder = FrequencyEncoder()

Writing custom_transformers.py


In [None]:
%%writefile about.py

import streamlit as st
from time import sleep
import pandas as pd

df = pd.read_csv('pages/eda_df.csv')
    
# ========== PAGE TITLE ==========
st.markdown(""""<h4 style="color:white;text-align:center;">⚽ Player Market Value Dataset Overview</h4>""", unsafe_allow_html=True)
st.markdown(""" <style> .overview-container { color: orange; font-family: 'Segoe UI', sans-serif;
                text-align: justify;} </style> 
    <div class="overview-container">
        <p>
            The <b>FIFA 21 dataset</b> from <i>Kaggle</i> brings the world of football into data form —
            featuring over <b>18,000 players</b> from clubs and national teams worldwide.
            Each record captures who a player is, how they perform, and what makes them valuable.
        </p>
        <p>
            It combines <b>numerical stats</b> like <i>age, pace, shooting, passing,</i> and
            <i>overall rating</i> with <b>categorical details</b> such as
            <i>nationality, club, position,</i> and <i>preferred foot</i>.
            The star of the show is the <b>market_value</b> — the estimated worth of each player in euros.
        </p>
        <p>
            This dataset goes beyond raw numbers — it tells the story of how
            <b>talent, skill, and reputation</b> translate into market value.
            It’s a dream playground for analysts and data scientists aiming to explore
            what truly makes a player worth millions.
        </p>
    </div> """, unsafe_allow_html=True)

st.markdown("<hr>", unsafe_allow_html=True)

# ========== BASIC INFO ==========
col1, col2, col3 = st.columns(3)
col1.metric("🧍 Total Players", f"{len(df):,}")
col2.metric("📊 Total Columns", f"{df.shape[1]}")
col3.metric("🌍 Nationalities", f"{df['nationality'].nunique()}")
st.markdown("<hr>", unsafe_allow_html=True)

# ========== COLUMN DESCRIPTIONS ==========
st.markdown("<div class='subheader'>📘 Column Descriptions</div>", unsafe_allow_html=True)

# Data dictionary (column name → description)
data_dict = {
    "name": "The player's full name.",
    "age": "The player’s age in years.",
    "nationality": "The country the player represents.",
    "club": "The football club where the player currently plays.",
    "position": "Detailed position(s) on the field (e.g., CM, RW, CDM).",
    "best_position": "The player’s main or most effective position.",
    "foot": "Preferred foot (Left or Right).",
    "height": "The player’s height in cm.",
    "weight": "The player’s weight (kg or lbs).",
    "over_all_rating": "The player’s overall rating.",
    "best_over_rating": "Best overall rating across all positions.",
    "potential": "Potential future rating (especially for young players).",
    "growth": "Difference between potential and current rating.",
    "intl_reputation": "International reputation (1–5 stars).",
    "total_stats": "Sum of all individual skill attributes.",
    "base_stats": "Core stats excluding advanced or positional ones.",
    "phys_index": "Custom physical ability index.",
    "skill_index": "Custom technical skill index.",
    "pace": "Speed and acceleration combined.",
    "shooting": "Finishing and shot power.",
    "passing": "Passing accuracy and vision.",
    "dribbling": "Ball control and agility.",
    "defense": "Tackling and marking ability.",
    "physical": "Strength, stamina, and jumping.",
    "cross": "Crossing accuracy.",
    "stamina": "Energy and endurance level.",
    "weak_foot": "Skill level with non-dominant foot (1–5).",
    "skill_moves": "Skill moves ability (1–5).",
    "attack_work_rate": "Player’s attacking effort (High/Medium/Low).",
    "defence_work_rate": "Defensive effort (High/Medium/Low).",
    "market_value": "Estimated player market value (€).",
    "contract": "Contract status or years left.",
    "hits": "Popularity or profile views on the platform."
}

# Convert dictionary to DataFrame
dict_df = pd.DataFrame(list(data_dict.items()), columns=["Column Name", "Description"])

# Display in a nice table
st.dataframe(dict_df, use_container_width=True)

# ========== SAMPLE DATA ==========
st.markdown("<div class='subheader'>🧾 Sample Data</div>", unsafe_allow_html=True)
st.dataframe(df, use_container_width=True)
st.markdown("<hr>", unsafe_allow_html=True)

# ========== Categorical COLUMN DESCRIPTIONS ==========
st.markdown("<div class='subheader'>📘 Some info to describe catgorical data</div>", unsafe_allow_html=True)
st.dataframe(df.describe(include= 'object').round(2))

# ========== FOOTER ==========
st.markdown("<br><hr><center style='color:gray;'> Let's go to The Next Stage. </center>", unsafe_allow_html=True)

def go_to(page):
    st.session_state["fade"] = True
    sleep(0.3)
    st.session_state["current_page"] = page

nav1, nav2, nav3 = st.columns([1, 2, 1])
with nav1:
    if st.button("⬅️ Back"):
        go_to("home_page.py")
        st.switch_page("home_page.py")

with nav3:
    if st.button("➡️ Next"):
        go_to("pages/eda1.py")
        st.switch_page("pages/eda1.py")


Writing about.py


In [None]:
%%writefile eda1.py

import streamlit as st
from ydata_profiling import  ProfileReport
import matplotlib.pyplot as plt
import plotly.express as px 
from time import sleep
import seaborn as sns
import pandas as pd
import numpy as np

df = pd.read_csv('pages/eda_df.csv')

# ========== PAGE TITLE ==========
st.markdown(""""<h4 style="color:white;text-align:center;">📊 Exploratory Data Analysis</h4>""", unsafe_allow_html=True)
st.markdown("<hr>", unsafe_allow_html=True)

# ========== BASIC INFO ==========
col1, col2, col3 = st.columns(3)
col1.metric("🧍 Total Numrical Columns", f"{len(df.select_dtypes(include= 'number').columns)}")
col2.metric("📊 Total Categorical Columns", f"{len(df.select_dtypes(include= 'object').columns)}")
col3.metric("🌍 Club", f"{df['club'].nunique()}")

st.markdown("<hr>", unsafe_allow_html=True)

tab_1, tab_2, tab_3 = st.tabs(["📊 Univariate Analysis", "📈 Bivariate Analysis", "📉 Multivariate Analysis"])

with tab_1:
    st.markdown('<style> .center-div {color: red; text-align: center;} </style> <div class="center-div">Univariate Analysis</div>', unsafe_allow_html=True)
    num_cols = df.select_dtypes(include= 'number')
    for col in num_cols.columns:
        fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))
        sns.histplot(df[col], kde=True, ax=axes[0])
        sns.boxplot(x=df[col], ax=axes[1])
        st.pyplot(fig)
        plt.close(fig)

    st.markdown("<hr>", unsafe_allow_html=True)

    stats = ['pace', 'shooting', 'passing',	'dribbling','defense', 'physical']
    for col in stats:
        fig = px.box(df, x='best_position', y=col,
                    title=f'{col.upper()} Distribution by Best_Position',
                    color='best_position')
        st.plotly_chart(fig)

with tab_2:
    st.markdown('<style> .center-div {color: red; text-align: center;} </style> <div class="center-div">Bivariate Analysis</div>', unsafe_allow_html=True)

    report_button = st.button('Show Report')

    if report_button:

        report = ProfileReport(df, title = 'Player Market Value Prediction FIFA21')

        st.write(st_profile_report(report))

with tab_3:
    st.markdown('<style> .center-div {color: red; text-align: center;} </style> <div class="center-div">Multivariate Analysis</div>', unsafe_allow_html=True)
    
    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>1.What  is the feature most strongly correlate with target col player market value?</div>", unsafe_allow_html=True)
    strong_corr = df.corr(numeric_only=True).drop('market_value')['market_value'].sort_values(ascending=False)
    fig_1= px.bar(data_frame= strong_corr, x= strong_corr.head(15).index, y= strong_corr.head(15).values, 
    color= strong_corr.head(15).index, title= "Top 15 Features Most Correlated with Player Market Value", 
    labels= {'x':'Feature', 'y':'Correlation Coefficient'})
    fig_1.update_layout(bargap=0.01, bargroupgap=0.05)
    st.plotly_chart(fig_1)
    st.markdown("<hr>", unsafe_allow_html=True)

    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>2.What is the Top 15 Nationality in the Game?</div>", unsafe_allow_html=True)
    nat_count_df = df.groupby('nationality').apply(lambda x:x['name'].count()).reset_index(name='counts')
    nat_count_df.sort_values(by='counts', ascending= False, inplace=True)
    top_15 = nat_count_df[:15]
    fig_2 = px.bar(data_frame= top_15, x='nationality', y='counts', color='counts',
    title = 'the Top 15 Nationality in the FIFA Game', text_auto= True)
    st.plotly_chart(fig_2)  
    st.markdown("<div class='subheader'> note the most player in the game can more 50% from palyers from top seven</div>", unsafe_allow_html=True)
    st.markdown("<hr>", unsafe_allow_html=True)

    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>3.Which nation has the most number of over performing players?</div>", unsafe_allow_html=True)
    nat_df1 = df.groupby('nationality')['over_all_rating'].mean().reset_index(name='over_all_rating')
    nat_df2 = df.groupby('nationality')['over_all_rating'].count().reset_index(name='player_count')
    merge_df = pd.merge(nat_df1, nat_df2, how='inner',left_on='nationality',right_on='nationality')
    new_df = merge_df[merge_df['player_count'] >= 200].sort_values(by=['over_all_rating','player_count'],ascending=[False,False])
    fig_3 = px.scatter(new_df, x='over_all_rating', y='player_count', color='player_count', size='over_all_rating',
                        hover_data=['nationality'], title='nationality Player counts and Average Potential')
    st.plotly_chart(fig_3)
    st.markdown("<div class='subheader'> note England has produced 1660 players, and still is having an average of 63.56, while Brazil has the highest average Ratings among the players</div>", unsafe_allow_html=True)
    st.markdown("<hr>", unsafe_allow_html=True)

    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>4.Which Team has the most number of Over Performing Players?</div>", unsafe_allow_html=True)
    club_df1 = df.groupby('club')['over_all_rating'].mean().reset_index(name='over_all_rating')
    club_df2 = df.groupby('club')['over_all_rating'].count().reset_index(name='player_count')
    club_merge_df = pd.merge(club_df1, club_df2, how='inner',left_on='club',right_on='club')
    club_new_df = club_merge_df[club_merge_df['player_count'] >= 25].sort_values(by=['over_all_rating','player_count'],ascending=[False,False])
    fig_4 = px.scatter(club_new_df, x='over_all_rating', y='player_count', color='player_count', size='over_all_rating',
                        hover_data=['club'], title='club Player counts and Average Potential')
    st.plotly_chart(fig_4)
    st.markdown("""<div class='subheader'> note as Bayern_Munich The team which has the highest average rating among all
    the teams (81.24) from a set of 26 players. Another team is Manchester_United which has the highest average among the teams with 45 layers.
    They have an average of 75.866 on the 45 players. Another Team is Real_Madrid has second higest average of 79.28 on the 32 players</div>""", unsafe_allow_html=True)
    st.markdown("<hr>", unsafe_allow_html=True)

    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>5.which postion has the largest number of players?</div>", unsafe_allow_html=True)
    best_df = df.groupby('best_position')['name'].count().reset_index(name='counts')
    best_df.sort_values(by='counts', ascending= False, inplace=True)
    top_15 = best_df[:15]
    fig_5 = px.bar(top_15, x='best_position', y='counts', color='counts', title='Player Postion counts in FIFA 21',
                    labels={'x':'position'}, text_auto= True)
    st.plotly_chart(fig_5)
    st.markdown("""<div class='subheader'> note as the most number of player population is for the Center Back Position,
    which is followed by Striker and The Central attacking midfielder positions in FIFA21 games.</div>""", unsafe_allow_html=True)
    st.markdown("<hr>", unsafe_allow_html=True)

    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>6.Which positions tend to have the highest market values?</div>", unsafe_allow_html=True)
    new_df1 = df.groupby(['best_position', 'foot'])['market_value'].mean().sort_values(ascending= False).reset_index()
    fig_6 = px.bar(data_frame= new_df1, x= 'best_position', y= 'market_value', color= 'foot', barmode='group')
    st.plotly_chart(fig_6)
    st.markdown("""<div class='subheader'> note as the highest player market value for (attacking Postion) in for Right Wing Position,
    which is followed by Striker(centre forward) and The Central attacking midfielder positions in FIFA21 games. </div> """, unsafe_allow_html=True)
    st.markdown("<hr>", unsafe_allow_html=True)

    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>7.Which nationalities produce the most valuable players?</div>", unsafe_allow_html=True)
    new_df2 = df.groupby(['nationality'])['market_value'].sum().sort_values(ascending= False).reset_index()
    fig_7 = px.bar(data_frame= new_df2.head(15), x= 'nationality', y= 'market_value', color= 'nationality', 
    labels={'nationality':'Nationality', 'market_value':'The Most Valuable player '})
    st.plotly_chart(fig_7)
    st.markdown("""<div class='subheader'>note as the player higest market value Spain players, which is followed by Brazil Players.</div>""", unsafe_allow_html=True)
    st.markdown("<hr>", unsafe_allow_html=True)

    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>8.How does player value change with age?</div>", unsafe_allow_html=True)
    fig_8 = px.scatter(data_frame= df, x='age', y='market_value', color='best_position', height= 700, width= 1200)
    new_df5 = df.groupby(['age'])['market_value'].mean().sort_values(ascending= False).reset_index()
    fig_9 = px.bar(data_frame= new_df5, x= 'age', y='market_value', color= 'age')
    st.plotly_chart(fig_8)
    st.plotly_chart(fig_9)
    st.markdown("<div class='subheader'>note the age the primairy key to controll player value and the higest market value almost age from 25 : 30 years old</div>", unsafe_allow_html=True)
    st.markdown("<hr>", unsafe_allow_html=True)
    
    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>9.What’s the relationship of player overall rating and potential ?</div>", unsafe_allow_html=True)
    fig_10 = px.scatter(data_frame= df, x='over_all_rating', y='potential', color='market_value', height= 700, width= 1200)
    st.plotly_chart(fig_10)
    st.markdown("<hr>", unsafe_allow_html=True)

    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>10.Who are the best players in the game?</div>", unsafe_allow_html=True)
    top_play = df[['name', 'over_all_rating', 'age','club','best_position']]
    top_play.sort_values(by='over_all_rating',ascending=False,inplace=True)
    top_100_play = top_play[:100]
    fig_11 = px.scatter(top_100_play, x='age', y='over_all_rating', color='age', size='over_all_rating',
                        hover_data=['name', 'club', 'best_position'], title='Top Football Players in the FIFA 21 game')
    st.plotly_chart(fig_11)
    st.markdown("<div class='subheader'>the 2 top player in fifa 2021 Ronaldo in Juventus & Messi in Barcelona</div>", unsafe_allow_html=True)
    st.markdown("<hr>", unsafe_allow_html=True)

    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>11.Best Over_all Team in FiFA 2021 </div>", unsafe_allow_html=True)
    final_team = df[['name', 'age', 'over_all_rating','best_position','club']]
    final_team.sort_values(by='age',inplace=True)
    pos_play = final_team.groupby('best_position')['over_all_rating'].max().reset_index(name='Overall Score')
    player_pos = pd.merge(final_team, pos_play, how='inner', left_on=['best_position','over_all_rating'], 
                            right_on=['best_position','Overall Score'])
    pos_best = player_pos[['name', 'club', 'age', 'best_position', 'Overall Score']]
    st.dataframe(pos_best)
    st.image('images/players1.png')
    st.markdown("<hr>", unsafe_allow_html=True)


    st.markdown("<style> .center-div1 {color: red; text-align: left;} </style> <div class='center-div1'>12.the Best Team with the players with the highest potential </div>", unsafe_allow_html=True)
    pot_team = df[['name', 'age', 'potential','best_position','club']]
    pot_team.sort_values(by='age',inplace=True)
    pos_df = pot_team.groupby('best_position')['potential'].max().reset_index(name='potential')
    new_pot_df = pd.merge(pot_team, pos_df, how='inner', left_on=['best_position','potential'], 
                        right_on=['best_position','potential'])
    pot_df = new_pot_df[['name', 'club', 'age', 'best_position', 'potential']]
    cm = sns.light_palette("black", as_cmap=True)
    pot_df[0:15].style.background_gradient(cmap=cm)
    st.dataframe(pot_df[0:15])
    st.image('images/players.png')
    st.markdown("<hr>", unsafe_allow_html=True)

def go_to(page):
    st.session_state["fade"] = True
    sleep(0.3)
    st.session_state["current_page"] = page
    
nav1, nav2, nav3 = st.columns([1, 2, 1])
with nav1:
    if st.button("⬅️ Back"):
        go_to("pages/about.py")
        st.switch_page("pages/about.py")

with nav3:
    if st.button("➡️ Next"):
        go_to("pages\modeling.py")
        st.switch_page("pages\modeling.py")


Writing eda1.py


In [None]:
%%writefile modeling.py

from streamlit_pandas_profiling import st_profile_report
from sklearn.base import BaseEstimator, TransformerMixin
from ydata_profiling import  ProfileReport
import streamlit as st
from time import sleep
import pandas as pd
import numpy as np
import joblib
import base64
import sys
from custom_transformers import LogTransformer, Handle_outliers_lb_ub, FrequencyEncoder

import types
sys.modules['__main__'].LogTransformer = LogTransformer
sys.modules['__main__'].Handle_outliers_lb_ub = Handle_outliers_lb_ub
sys.modules['__main__'].FrequencyEncoder = FrequencyEncoder

# ========== PAGE TITLE ==========
st.markdown(""""<h4 style="color:white;text-align:center;">🧠 Prediction Model</h4>""", unsafe_allow_html=True)
st.markdown("<hr>", unsafe_allow_html=True)

cl_df = pd.read_csv('pages/cleaned_df.csv')
st.dataframe(cl_df.head(10))
st.markdown("<hr>", unsafe_allow_html=True)  

nationality = st.selectbox('Nationality', cl_df.nationality.unique()) 
club = st.selectbox('Club', cl_df.club.unique())   
best_position = st.selectbox('Best Position', cl_df.best_position.unique())  
foot = st.radio('Foot', ['Right', 'Left']) 
attack_work_rate = st.radio('Attack Work Date', ['Low', 'Medium', 'High'])
defence_work_rate = st.radio('Defence Work Date', ['Low', 'Medium', 'High'])

height = st.number_input('Height', step = 0.1)
weight = st.number_input('Weight', step = 0.1)
contract = st.number_input('Contract', step = 1)
weak_foot = st.number_input('Weak Foot', step = 1)
skill_moves = st.number_input('Skill Moves', step = 1)
intl_reputation = st.number_input('Intl Reputation', step = 1)

age = st.slider('Player Age', min_value = cl_df.age.min(), max_value = cl_df.age.max(), step = 1)
over_all_rating = st.slider(' Palyer All Over Rating', min_value = cl_df.over_all_rating.min(), max_value = cl_df.over_all_rating.max(), step = 1)
potential = st.slider('Potential', min_value = cl_df.potential.min(), max_value = cl_df.potential.max(), step = 1)
cross = st.slider('Cross', min_value = cl_df.cross.min(), max_value = cl_df.cross.max(), step = 1)
stamina = st.slider('Stamina', min_value = cl_df.stamina.min(), max_value = cl_df.stamina.max(), step = 1)
total_stats = st.slider('Total Stats', min_value = cl_df.total_stats.min(), max_value = cl_df.total_stats.max(), step = 1)
pace = st.slider('Pace', min_value = cl_df.pace.min(), max_value = cl_df.pace.max(), step = 1)
shooting = st.slider('Shooting', min_value = cl_df.shooting.min(), max_value = cl_df.shooting.max(), step = 1)
passing = st.slider('Passing', min_value = cl_df.passing.min(), max_value = cl_df.passing.max(), step = 1)
dribbling = st.slider('Dribbling', min_value = cl_df.dribbling.min(), max_value = cl_df.dribbling.max(), step = 1)
defense = st.slider('Defense', min_value = cl_df.defense.min(), max_value = cl_df.defense.max(), step = 1)
physical = st.slider('Physical', min_value = cl_df.physical.min(), max_value = cl_df.physical.max(), step = 1)
phys_index = st.slider('Physics Index', min_value = cl_df.phys_index.min(), max_value = cl_df.phys_index.max(), step = 0.1)
skill_index = st.slider('Skill Index', min_value = cl_df.skill_index.min(), max_value = cl_df.skill_index.max(), step = 0.1)
st.markdown("<hr>", unsafe_allow_html=True) 

my_model = joblib.load('pages/Catboost.pkl')
new_data = pd.DataFrame([{
'age': age,
'over_all_rating': over_all_rating,
'nationality': nationality,
'club': club,
'best_position': best_position,
'potential': potential,
'height': height,
'weight': weight,
'foot': foot,
'contract': contract,
'cross': cross,
'stamina': stamina,
'total_stats': total_stats,
'weak_foot': weak_foot,
'skill_moves': skill_moves,
'attack_work_rate': attack_work_rate,
'defence_work_rate': defence_work_rate,
'intl_reputation': intl_reputation,
'pace': pace,
'shooting': shooting,
'passing': passing,
'dribbling': dribbling,
'defense': defense,
'physical': physical,
'phys_index': phys_index,
'skill_index': skill_index }])

predict_button = st.button('Predicted Player Market Value')
if predict_button:
    result = my_model.predict(new_data).round(1)[0]
    st.write(f"💰 Predicted Player Market Value: {result:,.0f} €")

def go_to(page):
    st.session_state["fade"] = True
    sleep(0.3)
    st.session_state["current_page"] = page
    
nav1, nav2, nav3 = st.columns([1, 2, 1])
with nav1:
    if st.button("⬅️ Back"):
        go_to("pages\eda1.py")
        st.switch_page("pages\eda1.py")

with nav3:
    if st.button("➡️ Next"):
        go_to("pages/presntation.py")
        st.switch_page("pages/presntation.py")

Writing modeling.py


In [None]:
%%writefile presntation.py

import streamlit as st
from time import sleep

# ======= PAGE CONFIG =======
st.set_page_config(
    page_title="FIFA 21 Market Value Prediction",
    page_icon="⚽",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# ======= CUSTOM STYLES =======
st.markdown("""
<style>
body {
    background-color: #0E1117;
    color: #FFFFFF;
}
h1, h2 {
    color: #F9A825;
    text-align: center;
}
h3 {
    color: #FFB300;
    margin-bottom: 0.4em;
}
.slide-box {
    background-color: rgba(255, 255, 255, 0.05);
    border-radius: 15px;
    padding: 2.5rem;
    margin-top: 1.5rem;
    box-shadow: 0px 3px 12px rgba(255, 186, 8, 0.15);
    animation: fadeIn 0.8s ease-in-out;
}
@keyframes fadeIn {
    from {opacity: 0; transform: translateY(15px);}
    to {opacity: 1; transform: translateY(0);}
}
.list-box {
    background-color: rgba(255, 255, 255, 0.07);
    border-left: 4px solid #FFB300;
    padding: 1rem;
    border-radius: 10px;
    margin-bottom: 1rem;
}
.point {
    font-size: 18px;
    line-height: 1.6em;
}
.footer {
    text-align: center;
    color: #BDBDBD;
    margin-top: 40px;
}
</style>
""", unsafe_allow_html=True)


# ======= SLIDES =======
slides = [
    {
        "emoji": "⚽",
        "title": "FIFA 21 Player Market Value Prediction",
        "content": """
        <h3>Project Overview</h3>
        <div class="list-box">
        - Complete analysis of FIFA 21 dataset. <br>
        - Data preprocessing, transformation & cleaning. <br>
        - AI model (CatBoost) for predicting player market value (€). <br>
        - Interactive Streamlit dashboard with insights.
        </div>
        <p style='text-align:center; color:#BBBBBB; font-style:italic;'>Uncover what makes a footballer worth millions! 🧠</p>
        """
    },
    {
        "emoji": "🧹",
        "title": "Data Cleaning Overview",
        "content": """
        <div class="list-box">
        - Removal of unnecessary columns (70+ dropped). <br>
        - Converted height from feet→cm & weight from lbs→kg. <br>
        - Transformed 'value', 'wage', and 'release clause' from € strings to numbers. <br>
        - Dropped missing-heavy columns: <b>'team & contract'</b>, <b>'loan date end'</b>. <br>
        - Cleaned rating columns ('W/F', 'SM', 'IR') → numeric stars (1–5). <br>
        - Applied <b>Log transformation</b> for skewed financial features.
        </div>
        <p style='text-align:center; color:#A5D6A7;'>✅ Clean, consistent, and ready for modeling!</p>
        """
    },
    {
        "emoji": "🧩",
        "title": "Detailed Feature Processing",
        "content": """
        <div class="list-box">
        <h3>Numeric Transformations</h3>
        - Height → <b>height (cm)</b><br>
        - Weight → <b>weight (kg)</b><br>
        - Financials → <b>value, wage, release clause</b> (converted from €, K, M)<br><br>
        <h3>Categorical Transformations</h3>
        - Rating columns → <b>W/F, SM, IR</b> converted to numeric.<br>
        - Removed redundant visuals: <b>player photo, club logo, flag photo</b>.<br><br>
        <h3>Dropped Positional Attributes</h3>
        - Attack, defense & GK role duplicates:<br>
        <i>LS, ST, LW, RW, CAM, CB, GK reflexes...</i><br>
        </div>
        <p style='text-align:center; color:#FFF59D;'>⚙️ Each column prepared with precision for model input.</p>
        """
    },
    {
        "emoji": "📊",
        "title": "EDA Correlation Analysis",
        "content": """
        <div class="list-box">
        - Strong correlation found between <b>total stats</b>, <b>base stats</b>, and <b>market value</b>.<br>
        - Offensive features (dribbling, finishing, short passing) dominate influence.<br>
        - Defensive and GK stats show low predictive power.<br>
        - Age group <b>25–30</b> peaks in market value.<br>
        </div>
        <p style='text-align:center; color:#81D4FA;'>📈 Skill attributes explain player worth better than physique.</p>
        """
    },
    {
        "emoji": "🌍",
        "title": "EDA Insights & Patterns",
        "content": """
        <div class="list-box">
        - Spain 🇪🇸 & Brazil 🇧🇷 lead total market value distribution. <br>
        - Attackers (ST, CAM, RW) dominate top-value clusters. <br>
        - Age vs Value → mid-career players more valuable. <br>
        - Visualization tools: <b>Seaborn, Plotly, Matplotlib</b>.<br>
        </div>
        <p style='text-align:center; color:#FFB74D;'>📊 EDA revealed the story behind football’s economics!</p>
        """
    },
    {
        "emoji": "🤖",
        "title": "Model Development",
        "content": """
        <div class="list-box">
        - Models tested: Linear Regression, Random Forest, CatBoost. <br>
        - <b>CatBoost</b> chosen for superior handling of categorical data. <br>
        - Achieved R² ≈ 0.90 with low RMSE. <br>
        - Combined preprocessing + model pipeline.
        </div>
        <p style='text-align:center; color:#FFD54F;'>🏆 CatBoost takes the trophy home!</p>
        """
    },
    {
        "emoji": "🚀",
        "title": "Key Insights & Future Work",
        "content": """
        <div class="list-box">
        - Skill metrics dominate over physical ones in impact. <br>
        - International reputation consistently adds value. <br>
        - Future work: Add FIFA 22/23 data, use SHAP explainability, deploy cloud app.
        </div>
        <p style='text-align:center; color:#FF7043;'>⚽ Data meets football — and the game becomes smarter!</p>
        """
    }
]

# ======= STATE =======
if "slide_index" not in st.session_state:
    st.session_state.slide_index = 0

slide = slides[st.session_state.slide_index]

# ======= DISPLAY =======
st.markdown(f"<h1>{slide['emoji']} {slide['title']}</h1>", unsafe_allow_html=True)
st.markdown(f"<div class='slide-box'>{slide['content']}</div>", unsafe_allow_html=True)

# ======= NAVIGATION =======
if "slide_index" not in st.session_state:
    st.session_state["slide_index"] = 0

current_slide = st.session_state["slide_index"]

col1, col2, col3 = st.columns([1, 2, 1])

with col1:
    if st.button("⬅️ Previous", key=f"prev_{current_slide}"):
        st.session_state["slide_index"] = max(0, st.session_state["slide_index"] - 1)
        st.rerun()

with col2:
    st.markdown(
        f"<p style='text-align:center; color:#999;'>Slide {st.session_state.slide_index+1}/{len(slides)}</p>",
        unsafe_allow_html=True)

with col3:
    if st.button("➡️ Next", key=f"next_{current_slide}"):
        st.session_state["slide_index"] = min(len(slides) - 1, st.session_state["slide_index"] + 1)
        st.rerun()

def go_to(page):
    st.session_state["fade"] = True
    sleep(0.3)
    st.session_state["current_page"] = page
    
nav1, nav2, nav3 = st.columns([1, 2, 1])
with nav1:
    if st.button("⬅️ Back"):
        go_to("pages\modeling.py")
        st.switch_page("pages\modeling.py")

with nav3:
    if st.button("➡️ Next"):
        go_to("home_page.py")
        st.switch_page("home_page.py")

# ======= FOOTER =======
st.markdown("<p class='footer'>📊 FIFA 21 Market Value Analysis — Crafted by Samir Masoud</p>", unsafe_allow_html=True)


Writing presntation.py


In [None]:
! streamlit run presntation.py

^C


In [3]:
import pipreqs
! pipreqs .

INFO: Not scanning for jupyter notebooks.
Please, verify manually the final list of requirements.txt to avoid possible dependency confusions.
Please, verify manually the final list of requirements.txt to avoid possible dependency confusions.
Please, verify manually the final list of requirements.txt to avoid possible dependency confusions.
Please, verify manually the final list of requirements.txt to avoid possible dependency confusions.
Please, verify manually the final list of requirements.txt to avoid possible dependency confusions.
INFO: Successfully saved requirements file in .\requirements.txt
