In [1]:
# ðŸ“Œ Getting Started with Pandas
import pandas as pd

# Create a sample DataFrame manually
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Alice'],
    'Age': [25, 30, None, 35, 29, 25],
    'City': ['New York', 'Los Angeles', 'New York', 'Chicago', 'Chicago', 'New York']
}
df = pd.DataFrame(data)

# Display the first 5 rows
print("ðŸ”¹ First 5 Rows:")
print(df.head())

# Get concise summary info
print("\nðŸ”¹ DataFrame Info:")
print(df.info())

# ðŸ“Œ Data Inspection & Selection

# Inspect data types of each column
print("\nðŸ”¹ Column Data Types:")
print(df.dtypes)

# Select a single column, e.g., 'Age'
print("\nðŸ”¹ Selected Column - Age:")
print(df['Age'])

# Slice specific rows (e.g., rows 2 to 5)
print("\nðŸ”¹ Sliced Rows (2 to 5):")
print(df[2:6])

# ðŸ“Œ Data Cleaning & Manipulation

# Fill missing values in 'Age' with the column mean
df['Age'] = df['Age'].fillna(df['Age'].mean())

# Rename columns
df = df.rename(columns={'Age': 'Age (Years)', 'Name': 'Full Name'})

# Remove duplicate rows
df = df.drop_duplicates()

# ðŸ“Œ Data Aggregation & Exporting

# Group by 'City' and calculate mean of 'Age (Years)'
grouped_data = df.groupby('City')['Age (Years)'].mean()
print("\nðŸ”¹ Grouped by City - Mean Age:")
print(grouped_data)

# Export cleaned DataFrame to a new CSV
df.to_csv('cleaned_data.csv', index=False)

# Group by 'City' and apply multiple aggregations
aggregated_data = df.groupby('City')['Age (Years)'].agg(['mean', 'sum', 'count'])
print("\nðŸ”¹ Aggregated Data (mean, sum, count) by City:")
print(aggregated_data)


ðŸ”¹ First 5 Rows:
      Name   Age         City
0    Alice  25.0     New York
1      Bob  30.0  Los Angeles
2  Charlie   NaN     New York
3    David  35.0      Chicago
4      Eve  29.0      Chicago

ðŸ”¹ DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    6 non-null      object 
 1   Age     5 non-null      float64
 2   City    6 non-null      object 
dtypes: float64(1), object(2)
memory usage: 272.0+ bytes
None

ðŸ”¹ Column Data Types:
Name     object
Age     float64
City     object
dtype: object

ðŸ”¹ Selected Column - Age:
0    25.0
1    30.0
2     NaN
3    35.0
4    29.0
5    25.0
Name: Age, dtype: float64

ðŸ”¹ Sliced Rows (2 to 5):
      Name   Age      City
2  Charlie   NaN  New York
3    David  35.0   Chicago
4      Eve  29.0   Chicago
5    Alice  25.0  New York

ðŸ”¹ Grouped by City - Mean Age:
City
Chicago        32.0
Los Ange