In [None]:
import pandas as pd

#Creating a DataFrame manually
data = {
    'user_id': [101, 102, 103, 104, None],
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'email': ['alice@example.com', 'bob@example.com', None, 'david@example.com', 'eve@example.com'],
    'signup_date': ['2023-01-01', '2023-02-15', '2023-03-22', '2023-04-10', ''],
    'purchase_amount': ['250.0', '300.5', 'not available', '150.75', '400.0']
}

df = pd.DataFrame(data)
print("Initial DataFrame:")
print(df)

#Save this to a CSV
df.to_csv('demo_users.csv', index=False)

#Read it back
df = pd.read_csv('demo_users.csv')
print("Read CSV:")
print(df)

#Convert 'signup_date' to datetime and handle errors
df['signup_date'] = pd.to_datetime(df['signup_date'], errors='coerce')

#Convert 'purchase_amount' to numeric (coerce errors like 'not available')
df['purchase_amount'] = pd.to_numeric(df['purchase_amount'], errors='coerce')

#Fill missing values
df['user_id'] = df['user_id'].fillna(999).astype(int)
df['purchase_amount'] = df['purchase_amount'].fillna(df['purchase_amount'].mean())
df['email'] = df['email'].fillna('no_email@example.com')

#Filter: Show only users who spent more than 250
high_spenders = df[df['purchase_amount'] > 250]
print("High spenders:")
print(high_spenders)

#Group by: Count how many users signed up each month
df['signup_month'] = df['signup_date'].dt.to_period('M')
monthly_signups = df.groupby('signup_month').size()
print("Monthly Signups:")
print(monthly_signups)

#Save cleaned data
df.to_csv('cleaned_demo_users.csv', index=False)
