In [3]:
import pandas as pd


In [5]:
import numpy as np


In [6]:
print("\n==================== 1. Series Creation ====================")




In [7]:
s1 = pd.Series([10, 20, 30, 40])
print("Series from list:\n", s1)

Series from list:
 0    10
1    20
2    30
3    40
dtype: int64


In [8]:
s2 = pd.Series([1, 2, 3], index=['a','b','c'])
print("\nSeries with custom index:\n", s2)


Series with custom index:
 a    1
b    2
c    3
dtype: int64


In [13]:
print("\n==================== 2. DataFrame Creation ====================")
# From dictionary
df1 = pd.DataFrame({
'Name': ['Alice','Bob','Charlie'],
'Age': [25,30,35],
'City': ['NY','LA','Chicago']
})
print("DataFrame from dictionary:\n", df1)


DataFrame from dictionary:
       Name  Age     City
0    Alice   25       NY
1      Bob   30       LA
2  Charlie   35  Chicago


In [14]:
arr = np.arange(9).reshape(3,3)
df2 = pd.DataFrame(arr, columns=['A','B','C'])
print("\nDataFrame from NumPy array:\n", df2)


DataFrame from NumPy array:
    A  B  C
0  0  1  2
1  3  4  5
2  6  7  8


In [15]:
df3 = pd.DataFrame([[1,2],[3,4],[5,6]], columns=['X','Y'])
print("\nDataFrame from list of lists:\n", df3)


DataFrame from list of lists:
    X  Y
0  1  2
1  3  4
2  5  6


In [16]:
print("\n==================== 3. Viewing Data ====================")
print("Head of df1:\n", df1.head(2))
print("Tail of df1:\n", df1.tail(2))
print("Columns of df1:", df1.columns)
print("Index of df1:", df1.index)
print("Shape of df1:", df1.shape)
print("Info of df1:")
print(df1.info())
print("Describe numeric data in df1:\n", df1.describe())



Head of df1:
     Name  Age City
0  Alice   25   NY
1    Bob   30   LA
Tail of df1:
       Name  Age     City
1      Bob   30       LA
2  Charlie   35  Chicago
Columns of df1: Index(['Name', 'Age', 'City'], dtype='object')
Index of df1: RangeIndex(start=0, stop=3, step=1)
Shape of df1: (3, 3)
Info of df1:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   City    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes
None
Describe numeric data in df1:
         Age
count   3.0
mean   30.0
std     5.0
min    25.0
25%    27.5
50%    30.0
75%    32.5
max    35.0


In [17]:
print("\n==================== 4. Selection & Indexing ====================")
print("Select 'Name' column:\n", df1['Name'])
print("Select multiple columns:\n", df1[['Name','City']])
print("Select row by loc:\n", df1.loc[1])
print("Select row by iloc:\n", df1.iloc[0])
print("Select specific element:\n", df1.loc[1,'City'])


Select 'Name' column:
 0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
Select multiple columns:
       Name     City
0    Alice       NY
1      Bob       LA
2  Charlie  Chicago
Select row by loc:
 Name    Bob
Age      30
City     LA
Name: 1, dtype: object
Select row by iloc:
 Name    Alice
Age        25
City       NY
Name: 0, dtype: object
Select specific element:
 LA


In [18]:
print("\n==================== 5. Filtering & Boolean Indexing ====================")
print("Rows where Age > 28:\n", df1[df1['Age']>28])
print("Rows where City is 'NY':\n", df1[df1['City']=='NY'])


Rows where Age > 28:
       Name  Age     City
1      Bob   30       LA
2  Charlie   35  Chicago
Rows where City is 'NY':
     Name  Age City
0  Alice   25   NY


In [19]:
print("\n==================== 6. Adding & Modifying Columns ====================")
df1['Salary'] = [50000, 60000, 70000]
print("After adding Salary:\n", df1)
df1['AgePlus5'] = df1['Age'] + 5
print("After modifying/adding AgePlus5:\n", df1)


After adding Salary:
       Name  Age     City  Salary
0    Alice   25       NY   50000
1      Bob   30       LA   60000
2  Charlie   35  Chicago   70000
After modifying/adding AgePlus5:
       Name  Age     City  Salary  AgePlus5
0    Alice   25       NY   50000        30
1      Bob   30       LA   60000        35
2  Charlie   35  Chicago   70000        40


In [20]:
print("\n==================== 7. Dropping Rows/Columns ====================")
df_dropped = df1.drop('AgePlus5', axis=1)
print("After dropping column AgePlus5:\n", df_dropped)
df_dropped_row = df1.drop(0, axis=0)
print("After dropping row 0:\n", df_dropped_row)


After dropping column AgePlus5:
       Name  Age     City  Salary
0    Alice   25       NY   50000
1      Bob   30       LA   60000
2  Charlie   35  Chicago   70000
After dropping row 0:
       Name  Age     City  Salary  AgePlus5
1      Bob   30       LA   60000        35
2  Charlie   35  Chicago   70000        40


In [21]:
print("\n==================== 8. Handling Missing Data ====================")
df_missing = pd.DataFrame({
'A':[1,np.nan,3],
'B':[4,5,np.nan]
})
print("DataFrame with missing values:\n", df_missing)
print("Drop rows with NaN:\n", df_missing.dropna())
print("Fill NaN with 0:\n", df_missing.fillna(0))
print("Check for NaN:\n", df_missing.isna())


DataFrame with missing values:
      A    B
0  1.0  4.0
1  NaN  5.0
2  3.0  NaN
Drop rows with NaN:
      A    B
0  1.0  4.0
Fill NaN with 0:
      A    B
0  1.0  4.0
1  0.0  5.0
2  3.0  0.0
Check for NaN:
        A      B
0  False  False
1   True  False
2  False   True


In [22]:
print("\n==================== 9. Aggregation Functions ====================")
print("Sum of numeric columns:\n", df1[['Age','Salary']].sum())
print("Mean of numeric columns:\n", df1[['Age','Salary']].mean())
print("Max of numeric columns:\n", df1[['Age','Salary']].max())
print("Min of numeric columns:\n", df1[['Age','Salary']].min())
print("Describe:\n", df1[['Age','Salary']].describe())


Sum of numeric columns:
 Age           90
Salary    180000
dtype: int64
Mean of numeric columns:
 Age          30.0
Salary    60000.0
dtype: float64
Max of numeric columns:
 Age          35
Salary    70000
dtype: int64
Min of numeric columns:
 Age          25
Salary    50000
dtype: int64
Describe:
         Age   Salary
count   3.0      3.0
mean   30.0  60000.0
std     5.0  10000.0
min    25.0  50000.0
25%    27.5  55000.0
50%    30.0  60000.0
75%    32.5  65000.0
max    35.0  70000.0


In [23]:
print("\n==================== 10. Grouping & Aggregation ====================")
df_group = pd.DataFrame({
'Department':['HR','IT','HR','IT','IT'],
'Employee':['Alice','Bob','Charlie','David','Eve'],
'Salary':[50000,60000,55000,65000,70000]
})
print("Original DataFrame:\n", df_group)
grouped = df_group.groupby('Department')['Salary'].mean()
print("Average salary by Department:\n", grouped)


Original DataFrame:
   Department Employee  Salary
0         HR    Alice   50000
1         IT      Bob   60000
2         HR  Charlie   55000
3         IT    David   65000
4         IT      Eve   70000
Average salary by Department:
 Department
HR    52500.0
IT    65000.0
Name: Salary, dtype: float64


In [24]:
print("\n==================== 11. Sorting ====================")
print("Sort by Age ascending:\n", df1.sort_values('Age'))
print("Sort by Salary descending:\n", df1.sort_values('Salary', ascending=False))


Sort by Age ascending:
       Name  Age     City  Salary  AgePlus5
0    Alice   25       NY   50000        30
1      Bob   30       LA   60000        35
2  Charlie   35  Chicago   70000        40
Sort by Salary descending:
       Name  Age     City  Salary  AgePlus5
2  Charlie   35  Chicago   70000        40
1      Bob   30       LA   60000        35
0    Alice   25       NY   50000        30


In [25]:
print("\n==================== 12. Merging / Concatenation ====================")
df_a = pd.DataFrame({'ID':[1,2,3],'Name':['A','B','C']})
df_b = pd.DataFrame({'ID':[2,3,4],'Score':[90,80,70]})
merged_df = pd.merge(df_a, df_b, on='ID', how='outer')
print("Merged DataFrame (outer join):\n", merged_df)
concat_df = pd.concat([df_a, df_b], ignore_index=True)
print("Concatenated DataFrame:\n", concat_df)


Merged DataFrame (outer join):
    ID Name  Score
0   1    A    NaN
1   2    B   90.0
2   3    C   80.0
3   4  NaN   70.0
Concatenated DataFrame:
    ID Name  Score
0   1    A    NaN
1   2    B    NaN
2   3    C    NaN
3   2  NaN   90.0
4   3  NaN   80.0
5   4  NaN   70.0


In [26]:
print("\n==================== 13. File I/O ====================")
# Save to CSV
df1.to_csv('df1.csv', index=False)
print("DataFrame saved to 'df1.csv'")
# Read from CSV
df_loaded = pd.read_csv('df1.csv')
print("Loaded DataFrame:\n", df_loaded)

print("\nAll major Pandas functions executed successfully!")


DataFrame saved to 'df1.csv'
Loaded DataFrame:
       Name  Age     City  Salary  AgePlus5
0    Alice   25       NY   50000        30
1      Bob   30       LA   60000        35
2  Charlie   35  Chicago   70000        40

All major Pandas functions executed successfully!
