In [7]:
import pandas as pd

# Load dataset
df = pd.read_csv("https://media.geeksforgeeks.org/wp-content/uploads/nba.csv")

# Data Loading & Familiarization
print(df.head())
print(df.info())
print(df.isnull().sum())


            Name            Team  Number Position   Age Height  Weight  \
0  Avery Bradley  Boston Celtics     0.0       PG  25.0    6-2   180.0   
1    Jae Crowder  Boston Celtics    99.0       SF  25.0    6-6   235.0   
2   John Holland  Boston Celtics    30.0       SG  27.0    6-5   205.0   
3    R.J. Hunter  Boston Celtics    28.0       SG  22.0    6-5   185.0   
4  Jonas Jerebko  Boston Celtics     8.0       PF  29.0   6-10   231.0   

             College     Salary  
0              Texas  7730337.0  
1          Marquette  6796117.0  
2  Boston University        NaN  
3      Georgia State  1148640.0  
4                NaN  5000000.0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 458 entries, 0 to 457
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      457 non-null    object 
 1   Team      457 non-null    object 
 2   Number    457 non-null    float64
 3   Position  457 non-null    object 
 4   Age     

In [8]:
def describe_dataset(dataframe):
    return {
        "shape": dataframe.shape,
        "columns": list(dataframe.columns),
        "data_types": dict(dataframe.dtypes)
    }

describe_dataset(df)


{'shape': (458, 9),
 'columns': ['Name',
  'Team',
  'Number',
  'Position',
  'Age',
  'Height',
  'Weight',
  'College',
  'Salary'],
 'data_types': {'Name': dtype('O'),
  'Team': dtype('O'),
  'Number': dtype('float64'),
  'Position': dtype('O'),
  'Age': dtype('float64'),
  'Height': dtype('O'),
  'Weight': dtype('float64'),
  'College': dtype('O'),
  'Salary': dtype('float64')}}

In [15]:
# Functional Cleaning & Preprocessing
def remove_incomplete_rows(dataframe):
    records = dataframe.to_dict(orient='records')
    complete_records = list(filter(lambda row: all(pd.notnull(val) for val in row.values()), records))
    return pd.DataFrame(complete_records)

cleaned_df = remove_incomplete_rows(df)

# Print cleaned data
print(cleaned_df.head())

            Name            Team  Number Position   Age Height  Weight  \
0  Avery Bradley  Boston Celtics     0.0       PG  25.0    6-2   180.0   
1    Jae Crowder  Boston Celtics    99.0       SF  25.0    6-6   235.0   
2    R.J. Hunter  Boston Celtics    28.0       SG  22.0    6-5   185.0   
3  Jordan Mickey  Boston Celtics    55.0       PF  21.0    6-8   235.0   
4   Kelly Olynyk  Boston Celtics    41.0        C  25.0    7-0   238.0   

         College     Salary  
0          Texas  7730337.0  
1      Marquette  6796117.0  
2  Georgia State  1148640.0  
3            LSU  1170960.0  
4        Gonzaga  2165160.0  


In [16]:
def normalize_text_columns(dataframe):
    new_df = dataframe.copy()
    for col in ['Name', 'Team']:
        new_df[col] = new_df[col].map(lambda x: x.strip().lower() if isinstance(x, str) else x)
    return new_df

normalized_df = normalize_text_columns(cleaned_df)

# Print the output
print(normalized_df.head())


            Name            Team  Number Position   Age Height  Weight  \
0  avery bradley  boston celtics     0.0       PG  25.0    6-2   180.0   
1    jae crowder  boston celtics    99.0       SF  25.0    6-6   235.0   
2    r.j. hunter  boston celtics    28.0       SG  22.0    6-5   185.0   
3  jordan mickey  boston celtics    55.0       PF  21.0    6-8   235.0   
4   kelly olynyk  boston celtics    41.0        C  25.0    7-0   238.0   

         College     Salary  
0          Texas  7730337.0  
1      Marquette  6796117.0  
2  Georgia State  1148640.0  
3            LSU  1170960.0  
4        Gonzaga  2165160.0  


In [11]:
names_upper = []
for name in df['Name']:
    if isinstance(name, str):
        names_upper.append(name.upper())


In [12]:
names_upper = list(map(lambda name: name.upper(), filter(lambda x: isinstance(x, str), df['Name'])))


In [13]:
# Ensure 'Salary' is numeric
normalized_df['Salary'] = pd.to_numeric(normalized_df['Salary'], errors='coerce')

# Functional-style summary
average_salary_by_team = normalized_df.groupby('Team')['Salary'].mean().to_dict()
average_salary_by_team


{'atlanta hawks': 5125754.545454546,
 'boston celtics': 3461755.6666666665,
 'brooklyn nets': 3654086.5384615385,
 'charlotte hornets': 3978124.153846154,
 'chicago bulls': 6105483.333333333,
 'cleveland cavaliers': 6733471.7272727275,
 'dallas mavericks': 4761692.083333333,
 'denver nuggets': 4240885.666666667,
 'detroit pistons': 4477884.2,
 'golden state warriors': 6711293.083333333,
 'houston rockets': 4404132.363636363,
 'indiana pacers': 4004800.0833333335,
 'los angeles clippers': 6707636.714285715,
 'los angeles lakers': 3270444.8333333335,
 'memphis grizzlies': 4374067.692307692,
 'miami heat': 6583812.1,
 'milwaukee bucks': 4771039.785714285,
 'minnesota timberwolves': 2663774.5,
 'new orleans pelicans': 4262525.5,
 'new york knicks': 5270654.7272727275,
 'oklahoma city thunder': 5822521.285714285,
 'orlando magic': 3788728.8,
 'philadelphia 76ers': 2213778.1428571427,
 'phoenix suns': 3457318.076923077,
 'portland trail blazers': 3220121.2,
 'sacramento kings': 4829759.53846

In [14]:
def add_salary_bracket(dataframe):
    new_df = dataframe.copy()
    new_df['salary_bracket'] = [
        'high' if s >= 5_000_000 else 'low' for s in new_df['Salary']
    ]
    return new_df

final_df = add_salary_bracket(normalized_df)
final_df[['Name', 'Salary', 'salary_bracket']].head()


Unnamed: 0,Name,Salary,salary_bracket
0,avery bradley,7730337.0,high
1,jae crowder,6796117.0,high
2,r.j. hunter,1148640.0,low
3,jordan mickey,1170960.0,low
4,kelly olynyk,2165160.0,low
