In [1]:
import pandas as pd

# Load and Explore the Data
# Load the sales_data.csv file using Pandas
sales_data = pd.read_csv('sales_data.csv')

# Display the first 5 rows of the dataset
print("First 5 rows of the dataset:")
print(sales_data.head())

# Print basic statistics of the numerical columns
print("\nBasic statistics:")
print(sales_data.describe())

# Data Analysis
# Calculate the total sales for each region
total_sales_by_region = sales_data.groupby('Region')['Sales'].sum()
print("\nTotal sales for each region:")
print(total_sales_by_region)

# Find the most sold product (based on quantity)
most_sold_product = sales_data.groupby('Product')['Quantity'].sum().idxmax()
print(f"\nMost sold product: {most_sold_product}")

# Compute the average profit margin for each product
sales_data['Profit_Margin'] = (sales_data['Profit'] / sales_data['Sales']) * 100
average_profit_margin_by_product = sales_data.groupby('Product')['Profit_Margin'].mean()
print("\nAverage profit margin for each product:")
print(average_profit_margin_by_product)

# Data Filtering
# Extract all rows where sales are greater than 1000
sales_above_1000 = sales_data[sales_data['Sales'] > 1000]
print("\nRows where sales are greater than 1000:")
print(sales_above_1000)

# Find all sales records for a specific region (e.g., "East")
east_region_sales = sales_data[sales_data['Region'] == 'East']
print("\nSales records for the East region:")
print(east_region_sales)

# Data Processing
# Add a new column, Profit_Per_Unit, calculated as Profit / Quantity
sales_data['Profit_Per_Unit'] = sales_data['Profit'] / sales_data['Quantity']

# Create another column, High_Sales, which labels rows as Yes if Sales > 1000, else No
sales_data['High_Sales'] = sales_data['Sales'].apply(lambda x: 'Yes' if x > 1000 else 'No')

# Assignment 1: Data Exploration
# Load the banking_data.csv file using Pandas
banking_data = pd.read_csv('banking_data.csv')

# Display the first 5 rows of the dataset
print("\nFirst 5 rows of the banking dataset:")
print(banking_data.head())

# Use .describe() to generate basic statistics of the numerical columns
print("\nBasic statistics of numerical columns:")
print(banking_data.describe())

# Check for missing values in the dataset
missing_values = banking_data.isnull().sum()
print("\nMissing values in the dataset:")
print(missing_values)

# Assignment 2: Data Aggregation and Grouping
# Group the data by Account_Type and calculate:
# The total sum of Transaction_Amount
total_transaction_by_account_type = banking_data.groupby('Account_Type')['Transaction_Amount'].sum()
print("\nTotal transaction amount by account type:")
print(total_transaction_by_account_type)

# The average Account_Balance for each account type
average_balance_by_account_type = banking_data.groupby('Account_Type')['Account_Balance'].mean()
print("\nAverage account balance by account type:")
print(average_balance_by_account_type)

# Group the data by Branch and calculate:
# The total number of transactions per branch
total_transactions_per_branch = banking_data.groupby('Branch').size()
print("\nTotal number of transactions per branch:")
print(total_transactions_per_branch)

# The average transaction amount per branch
average_transaction_per_branch = banking_data.groupby('Branch')['Transaction_Amount'].mean()
print("\nAverage transaction amount per branch:")
print(average_transaction_per_branch)


First 5 rows of the dataset:
  Region Product  Quantity  Sales  Profit
0   East       A       100   1000     200
1   West       B       150   1500     300
2  North       C       200   2000     400
3  South       D       250   2500     500

Basic statistics:
         Quantity        Sales      Profit
count    4.000000     4.000000    4.000000
mean   175.000000  1750.000000  350.000000
std     64.549722   645.497224  129.099445
min    100.000000  1000.000000  200.000000
25%    137.500000  1375.000000  275.000000
50%    175.000000  1750.000000  350.000000
75%    212.500000  2125.000000  425.000000
max    250.000000  2500.000000  500.000000

Total sales for each region:
Region
East     1000
North    2000
South    2500
West     1500
Name: Sales, dtype: int64

Most sold product: D

Average profit margin for each product:
Product
A    20.0
B    20.0
C    20.0
D    20.0
Name: Profit_Margin, dtype: float64

Rows where sales are greater than 1000:
  Region Product  Quantity  Sales  Profit  Profi

FileNotFoundError: [Errno 2] No such file or directory: 'banking_data.csv'