# Pandas Basics

## Create a Dataframe

In [6]:
import pandas as pd
import numpy as np

# Generate random data
data = {
    'int_column': np.random.randint(0, 100, 1000),
    'float_column': np.random.rand(1000),
    'str_column': np.random.choice(['A', 'B', 'C', 'D'], 1000),
    'bool_column': np.random.choice([True, False], 1000),
    'date_column': pd.date_range(start='1/1/2022', periods=1000, freq='D'),
    'category_column': pd.Categorical(np.random.choice(['cat1', 'cat2', 'cat3'], 1000)),
    'object_column': np.random.choice(['obj1', 'obj2', 'obj3'], 1000),
    'timedelta_column': pd.to_timedelta(np.random.randint(1, 1000, 1000), unit='s'),
    'complex_column': np.random.randn(1000) + 1j * np.random.randn(1000),
    'none_column': [None] * 1000
}

# Create DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df.head()

Unnamed: 0,int_column,float_column,str_column,bool_column,date_column,category_column,object_column,timedelta_column,complex_column,none_column
0,53,0.799906,C,True,2022-01-01,cat2,obj3,0 days 00:12:45,-0.951502+0.553729j,
1,37,0.580253,A,False,2022-01-02,cat3,obj2,0 days 00:11:38,-0.367660+0.827286j,
2,26,0.905094,D,True,2022-01-03,cat2,obj2,0 days 00:11:48,-0.373460+0.417391j,
3,79,0.485217,C,True,2022-01-04,cat3,obj1,0 days 00:11:42,0.869424+0.079001j,
4,28,0.782045,B,True,2022-01-05,cat2,obj1,0 days 00:00:51,-1.611510+1.248296j,


In [None]:
# Display the info of the DataFrame
nba = pd.read_csv('nba.csv')
nba.head()
nba.info()
nba.describe()
nba['Name'].str.len()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 592 entries, 0 to 591
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      591 non-null    object 
 1   Team      591 non-null    object 
 2   Position  584 non-null    object 
 3   Height    585 non-null    object 
 4   Weight    584 non-null    float64
 5   College   578 non-null    object 
 6   Salary    488 non-null    float64
dtypes: float64(2), object(5)
memory usage: 32.5+ KB


0      10.0
1      17.0
2      11.0
3      12.0
4      14.0
       ... 
587    12.0
588    13.0
589    16.0
590    12.0
591     NaN
Name: Name, Length: 592, dtype: float64

In [None]:
# Select rows where the length of the Name column is less than or equal to 15
nba[nba['Name'].str.len() <= 15]

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
5,Trent Forrest,Atlanta Hawks,G,6-4,210.0,Florida State,508891.0
...,...,...,...,...,...,...,...
585,Eugene Omoruyi,Washington Wizards,F,6-6,235.0,Oregon,559782.0
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0


In [None]:
# Filter the DataFrame
nba_filtered = nba[nba['Weight'] > 200]
nba_filtered

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
5,Trent Forrest,Atlanta Hawks,G,6-4,210.0,Florida State,508891.0
...,...,...,...,...,...,...,...
581,Corey Kispert,Washington Wizards,F,6-6,224.0,Gonzaga,3722040.0
582,Kyle Kuzma,Washington Wizards,F,6-9,221.0,Utah,25568182.0
583,Mike Muscala,Washington Wizards,F-C,6-11,240.0,Bucknell,3500000.0
585,Eugene Omoruyi,Washington Wizards,F,6-6,235.0,Oregon,559782.0


In [21]:
nba['Salary'].fillna(0, inplace=True)
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556.983
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700.000
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094.244
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616.000
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581.522
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719.864
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250.000
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0.000
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195.122
