In [37]:
import pandas as pd


# 2. Create a Pandas DataFrame (from lists/dictionaries)

In [38]:
data = {
    'Customer Name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Ethan'],
    'Product': ['Laptop', 'Tablet', 'Smartphone', 'Laptop', 'Headphones'],
    'Amount': [1200, 800, 600, 1300, 150]
}

df = pd.DataFrame(data)
print("Initial DataFrame:")
print(df)


Initial DataFrame:
  Customer Name     Product  Amount
0         Alice      Laptop    1200
1           Bob      Tablet     800
2       Charlie  Smartphone     600
3         Diana      Laptop    1300
4         Ethan  Headphones     150


# 3. Load Data from an External CSV File

In [39]:
df = pd.read_csv('customer_purchases.csv')


# . Check the First and Last Few Rows of the DataFrame

In [40]:
print("First 5 rows:")
print(df.head())

print("\nLast 5 rows:")
print(df.tail())


First 5 rows:
     Abi   apple  1000
0  Reshi  Grapes  2000

Last 5 rows:
     Abi   apple  1000
0  Reshi  Grapes  2000


# 5. Retrieve DataFrame Properties

In [41]:
print("Shape of the DataFrame (rows, columns):", df.shape)

print("\nInfo about DataFrame:")
df.info()


Shape of the DataFrame (rows, columns): (1, 3)

Info about DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Abi     1 non-null      object
 1   apple   1 non-null      object
 2   1000    1 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 156.0+ bytes


# . Get Summary Statistics

In [42]:
print("Summary Statistics:")
print(df.describe())


Summary Statistics:
         1000
count     1.0
mean   2000.0
std       NaN
min    2000.0
25%    2000.0
50%    2000.0
75%    2000.0
max    2000.0


# 7. Access and Modify Specific Columns

In [43]:
print(df.head())     # View first 5 rows
print(df.columns)  


     Abi   apple  1000
0  Reshi  Grapes  2000
Index(['Abi', 'apple', '1000'], dtype='object')


# 8. Filter Customer Data Based on Conditions

In [44]:
print(df.columns.tolist())



['Abi', 'apple', '1000']


# 9. Sort the Dataset

In [45]:
sorted_df = df.sort_values(by='Amount', ascending=False)
print("Sorted by Amount (desc):")
print(sorted_df)


KeyError: 'Amount'

# 10. Handle Missing Values

# Detect missing values:

In [None]:
print("Missing values per column:")
print(df.isnull().sum())


# 11. Group and Aggregate Data

# Total amount spent by each custome

In [None]:
total_by_customer = df.groupby('Customer Name')['Amount'].sum()
print("Total spent per customer:")
print(total_by_customer)


# 12. Add a New Calculated Column

# Add a 10% discount column for purchases above $1000:

In [None]:
df['Discount'] = df['Amount'].apply(lambda x: x * 0.10 if x > 1000 else 0)


# 13. Save Cleaned Data to a New CSV File

In [None]:
df.to_csv('cleaned_customer_purchases.csv', index=False)
