In [7]:
import pandas as pd

# Read the CSV file with specified data types
df = pd.read_csv("FIFA23_official_data.csv", dtype={'Preferred Foot': str, 'Work Rate': str})

# Clean up 'Best Overall Rating' column
df['Best Overall Rating'] = df['Best Overall Rating'].str.extract(r'(\d+)').astype(float)

# Get the total number of rows and columns
total_rows = df.shape[0]
total_columns = df.shape[1]

# Print the results
print("Total rows:", total_rows)
print("Total columns:", total_columns)


Total rows: 17660
Total columns: 29


In [8]:
print(df.iloc[0])

ID                                                                    209658
Name                                                             L. Goretzka
Age                                                                       27
Photo                       https://cdn.sofifa.net/players/209/658/23_60.png
Nationality                                                          Germany
Flag                                     https://cdn.sofifa.net/flags/de.png
Overall                                                                   87
Potential                                                                 88
Club                                                       FC Bayern München
Club Logo                             https://cdn.sofifa.net/teams/21/30.png
Value                                                                   €91M
Wage                                                                   €115K
Special                                                                 2312

In [9]:
# Drop columns that are not needed
columns_to_drop = ['Best Overall Rating', 'Loaned From']
df.drop(columns_to_drop, axis=1, inplace=True)
print(df.isnull().sum()) #check if Nan value still dey


ID                             0
Name                           0
Age                            0
Photo                          0
Nationality                    0
Flag                           0
Overall                        0
Potential                      0
Club                         211
Club Logo                      0
Value                          0
Wage                           0
Special                        0
Preferred Foot                 0
International Reputation       0
Weak Foot                      0
Skill Moves                    0
Work Rate                      0
Body Type                     38
Real Face                     38
Position                      35
Joined                      1098
Contract Valid Until         361
Height                         0
Weight                         0
Release Clause              1151
Kit Number                    35
dtype: int64


In [10]:
df['Club'].fillna('Unknown', inplace=True)  # Replace NaN in 'Club' with 'Unknown'
df['Body Type'].fillna('Unknown', inplace=True)  # Replace NaN in 'Body Type' with 'Unknown'
df['Real Face'].fillna('Unknown', inplace=True)  # Replace NaN in 'Real Face' with 'Unknown'
df['Position'].fillna('Unknown', inplace=True)  # Replace NaN in 'Position' with 'Unknown'
df['Joined'].fillna('Unknown', inplace=True)  # Replace NaN in 'Joined' with 'Unknown'
df['Contract Valid Until'].fillna('Unknown', inplace=True)  # Replace NaN in 'Contract Valid Until' with 'Unknown'
df['Release Clause'].fillna('Unknown', inplace=True)
df['Kit Number'].fillna('Unknown', inplace=True)
print(df.isnull().sum())

ID                          0
Name                        0
Age                         0
Photo                       0
Nationality                 0
Flag                        0
Overall                     0
Potential                   0
Club                        0
Club Logo                   0
Value                       0
Wage                        0
Special                     0
Preferred Foot              0
International Reputation    0
Weak Foot                   0
Skill Moves                 0
Work Rate                   0
Body Type                   0
Real Face                   0
Position                    0
Joined                      0
Contract Valid Until        0
Height                      0
Weight                      0
Release Clause              0
Kit Number                  0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Club'].fillna('Unknown', inplace=True)  # Replace NaN in 'Club' with 'Unknown'
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Body Type'].fillna('Unknown', inplace=True)  # Replace NaN in 'Body Type' with 'Unknown'
The behavior will change in pandas 3.0. This inplace met