In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import jarque_bera, normaltest, ttest_1samp
from statsmodels.stats.stattools import durbin_watson

# Load your dataset from a CSV file
csv_file_path = "/content/drive/MyDrive/Dataset/Final Dataset Fifa/final-mf.csv"

# Load the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Print the column names to verify their existence
print(df.columns)


Index(['Age', 'Potential', 'Value', 'Crossing', 'ShortPassing', 'Dribbling',
       'LongPassing', 'BallControl', 'SprintSpeed', 'LongShots', 'Aggression',
       'Vision', 'BMI', 'Work Rate_NUM'],
      dtype='object')


In [None]:
# List of numerical columns to perform tests on
numerical_columns = ['Age', 'Potential', 'Value', 'Crossing', 'ShortPassing', 'Dribbling',
       'LongPassing', 'BallControl', 'SprintSpeed', 'LongShots', 'Aggression',
       'Vision', 'BMI', 'Work Rate_NUM']

# Iterate through each numerical column
for col in numerical_columns:
    data = df[col]

    # JB test
    jb_stat, jb_p_value = jarque_bera(data)
    print(f'Jarque-Bera Test for {col}:')
    print(f'  JB Test Statistic: {jb_stat}')
    print(f'  JB P-Value: {jb_p_value}')

    # P-value test
    p_value_stat, p_value = normaltest(data)
    print(f'P-Value Test for {col}:')
    print(f'  P-Value Test Statistic: {p_value_stat}')
    print(f'  P-Value: {p_value}')

    # T-test (one-sample T-test against a known population mean)
    population_mean = 0  # Replace with your known population mean
    t_stat, t_p_value = ttest_1samp(data, population_mean)
    print(f'T-Test for {col}:')
    print(f'  T-Test Statistic: {t_stat}')
    print(f'  T-Test P-Value: {t_p_value}')

    # Standard error
    std_error = np.std(data, ddof=1) / np.sqrt(len(data))
    print(f'Standard Error for {col}: {std_error}')

    # DW test
    # Assuming your data is time series data, you can perform the DW test
    dw_stat = durbin_watson(data)
    print(f'Durbin-Watson Test for {col}:')
    print(f'  Durbin-Watson Statistic: {dw_stat}')


Jarque-Bera Test for Age:
  JB Test Statistic: 204.88264287596152
  JB P-Value: 3.238168029023175e-45
P-Value Test for Age:
  P-Value Test Statistic: 282.5518110480175
  P-Value: 4.412185926615888e-62
T-Test for Age:
  T-Test Statistic: 417.04530291656596
  T-Test P-Value: 0.0
Standard Error for Age: 0.06145721457907089
Durbin-Watson Test for Age:
  Durbin-Watson Statistic: 0.0443325740770137
Jarque-Bera Test for Potential:
  JB Test Statistic: 39.46691603460913
  JB P-Value: 2.6907173196393845e-09
P-Value Test for Potential:
  P-Value Test Statistic: 38.81639520998726
  P-Value: 3.7250051409552224e-09
T-Test for Potential:
  T-Test Statistic: 946.7487520289654
  T-Test P-Value: 0.0
Standard Error for Potential: 0.07724376981095785
Durbin-Watson Test for Potential:
  Durbin-Watson Statistic: 0.008827346922991051
Jarque-Bera Test for Value:
  JB Test Statistic: 543460.3719265755
  JB P-Value: 0.0
P-Value Test for Value:
  P-Value Test Statistic: 6257.805606727911
  P-Value: 0.0
T-Test f

In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Load your dataset (replace 'your_dataset.csv' with the actual file path)
data = pd.read_csv('final-mf.csv')

# Extract the predictor variables (independent variables) for which you want to check multicollinearity
# Replace 'X1', 'X2', 'X3', etc. with the actual column names of your predictor variables
X = data[['Age', 'Potential', 'Value', 'Crossing', 'ShortPassing', 'Dribbling',
       'LongPassing', 'BallControl', 'SprintSpeed', 'LongShots', 'Aggression',
       'Vision', 'BMI', 'Work Rate_NUM']]

# Add a constant (intercept) term to the predictor variables
X['intercept'] = 1

# Calculate VIF values
vif = pd.DataFrame()
vif["Variable"] = X.columns
vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

# Display the VIF values
print(vif)


         Variable         VIF
0             Age    3.191567
1       Potential    4.124465
2           Value    1.786369
3        Crossing    2.435176
4    ShortPassing    8.461844
5       Dribbling    5.965182
6     LongPassing    5.419795
7     BallControl    8.917348
8     SprintSpeed    1.875234
9       LongShots    2.396707
10     Aggression    1.602598
11         Vision    4.311338
12            BMI    1.086614
13  Work Rate_NUM    1.140431
14      intercept  838.326875
