In [14]:
import pandas as pd

In [16]:
# 1. Creating a DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)
print("DataFrame df:")
print(df)


DataFrame df:
      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [12]:
# 2. Reading a CSV File (Assuming 'data.csv' is in the current directory)
# Reading data from a CSV file
df = pd.read_csv('sample_data.csv')

# Displaying the first few rows of the DataFrame
print("First 5 rows of the DataFrame:")
print(df.head())

First 5 rows of the DataFrame:
      0 1 2 3
0     1 4 5 6
1    2 7 8 9 
2  3 10 11 12
3  4 13 14 14
4  5 16 17 18


In [17]:
# 3. Data Selection
print("Selecting a single column 'Name':")
print(df['Name'])

print("Selecting multiple columns 'Name' and 'Age':")
print(df[['Name', 'Age']])

print("Selecting rows by index (first two rows):")
print(df.iloc[0:2])

print("Selecting rows by condition (Age > 30):")
print(df[df['Age'] > 30])


Selecting a single column 'Name':
0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
Selecting multiple columns 'Name' and 'Age':
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
Selecting rows by index (first two rows):
    Name  Age         City
0  Alice   25     New York
1    Bob   30  Los Angeles
Selecting rows by condition (Age > 30):
      Name  Age     City
2  Charlie   35  Chicago


In [None]:
#4 Group by 'City' and calculate the mean of 'Age' and 'Salary'
numeric_columns = df.select_dtypes(include='number').columns
grouped = df.groupby('City')[numeric_columns].mean()

print("Grouped and Aggregated DataFrame:")
print(grouped)

Grouped and Aggregated DataFrame:
              Age
City             
Chicago      35.0
Los Angeles  30.0
New York     25.0


In [None]:
# 5. Adding a New Column
df['Salary'] = [70000, 80000, 90000]
print("DataFrame with New Column 'Salary':")
print(df)

DataFrame with New Column 'Salary':
      Name  Age         City  Salary
0    Alice   25     New York   70000
1      Bob   30  Los Angeles   80000
2  Charlie   35      Chicago   90000


In [None]:
# 6. Handling Missing Data
df_with_nan = df.copy()
df_with_nan.loc[1, 'Salary'] = np.nan

print("DataFrame with NaN value:")
print(df_with_nan)

print("Filling NaN values with 0:")
df_with_nan.fillna(0, inplace=True)
print(df_with_nan)

DataFrame with NaN value:
      Name  Age         City   Salary
0    Alice   25     New York  70000.0
1      Bob   30  Los Angeles      NaN
2  Charlie   35      Chicago  90000.0
Filling NaN values with 0:
      Name  Age         City   Salary
0    Alice   25     New York  70000.0
1      Bob   30  Los Angeles      0.0
2  Charlie   35      Chicago  90000.0


In [None]:
# 7. Merging DataFrames
data2 = {'Name': ['Alice', 'Bob', 'David'],
         'Salary': [70000, 80000, 60000]}
df2 = pd.DataFrame(data2)
print("DataFrame df2:")
print(df2)

merged_df = pd.merge(df, df2, on='Name', how='inner')
print("Merged DataFrame:")
print(merged_df)

DataFrame df2:
    Name  Salary
0  Alice   70000
1    Bob   80000
2  David   60000
Merged DataFrame:
    Name  Age         City  Salary_x  Salary_y
0  Alice   25     New York     70000     70000
1    Bob   30  Los Angeles     80000     80000
