In [1]:
import pandas as pd
import numpy as np

materials = pd.read_csv("E:/Data Science/EcoPackAI/data/raw/materials.csv")
products = pd.read_csv("E:/Data Science/EcoPackAI/data/raw/products.csv")

In [2]:
str_cols = [
    "product_id", "product_name", "product_category",
    "fragility_level", "temperature_sensitivity",
    "moisture_sensitivity", "packaging_format",
    "current_packaging_material"
]
for col in str_cols:
    products[col] = products[col].str.strip()

In [3]:
products["fragility_level"] = products["fragility_level"].str.title()
products["temperature_sensitivity"] = products["temperature_sensitivity"].str.title()
products["moisture_sensitivity"] = products["moisture_sensitivity"].str.title()
products["product_category"] = products["product_category"].str.title()

In [4]:
materials.rename(
    columns={"co2_emission_kg_per_kg": "co2_emission_per_kg"},
    inplace=True
)

In [5]:
materials.columns

Index(['material_id', 'material_type', 'strength_mpa', 'weight_capacity',
       'co2_emission_per_kg', 'biodegradability_score', 'recyclability_pct',
       'cost_inr_per_kg', 'material_category'],
      dtype='object')

In [9]:
def flag_outliers(series, lower_q=0.01, upper_q=0.99):
    low, high = series.quantile([lower_q, upper_q])
    return (series < low) | (series > high)

products["flag_weight_outlier"] = flag_outliers(products["product_weight_g"])
products["flag_volume_outlier"] = flag_outliers(products["product_volume_cm3"])
products["flag_price_outlier"] = flag_outliers(products["price_inr"])

In [None]:
products["flag_weight_outlier"].value_counts()

flag_price_outlier
False    453
True       4
Name: count, dtype: int64

In [13]:
products["flag_volume_outlier"].value_counts()

flag_volume_outlier
False    447
True      10
Name: count, dtype: int64

In [14]:
products["flag_price_outlier"].value_counts()

flag_price_outlier
False    453
True       4
Name: count, dtype: int64

In [15]:
products.loc[products["flag_weight_outlier"], 
             ["product_name", "product_weight_g"]].head(5)

Unnamed: 0,product_name,product_weight_g
53,Lakmé Lipstick 5 g,5.0
148,Everest Rice 10000 g,10000.0
171,Lakmé Lipstick 5 g v2,5.0
342,Saffola Flour 10000 g,10000.0
367,Fortune Rice 10000 g,10000.0


In [17]:
materials.to_csv("E:/Data Science/EcoPackAI/data/processed/materials_cleaned.csv", index=False)
products.to_csv("E:/Data Science/EcoPackAI/data/processed/products_cleaned.csv", index=False)

print("Data Cleaning completed successfully.")

Data Cleaning completed successfully.


In [3]:
pip install psycopg2

Collecting psycopg2
  Downloading psycopg2-2.9.11-cp312-cp312-win_amd64.whl.metadata (5.1 kB)
Downloading psycopg2-2.9.11-cp312-cp312-win_amd64.whl (2.7 MB)
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
    --------------------------------------- 0.0/2.7 MB 487.6 kB/s eta 0:00:06
   ----- ---------------------------------- 0.4/2.7 MB 3.5 MB/s eta 0:00:01
   ---------------- ----------------------- 1.1/2.7 MB 7.2 MB/s eta 0:00:01
   --------------------------- ------------ 1.9/2.7 MB 9.2 MB/s eta 0:00:01
   ---------------------------------------  2.7/2.7 MB 10.6 MB/s eta 0:00:01
   ---------------------------------------- 2.7/2.7 MB 9.6 MB/s eta 0:00:00
Installing collected packages: psycopg2
Successfully installed psycopg2-2.9.11
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [10]:
import psycopg2

conn = psycopg2.connect(
    dbname = "ecopack_ai",
    user = "postgres",
    password = "Manikanta@3",
    host = "localhost",
    port = "5432"
)

print("Database Connected")

Database Connected


In [17]:
query = conn.cursor()

query.execute("SELECT COUNT(*) FROM materials;")
print(query.fetchall())

[(22,)]


In [18]:
query.execute("SELECT * FROM products LIMIT 5;")
rows = query.fetchall()

for row in rows:
    print(row)

('PRD0001', 'Dabur Body Wash 400 ml v2', 'Personal Care', Decimal('400.00'), Decimal('422.27'), Decimal('221.40'), 'Low', 'Low', 'Medium', 720, 'Pouch', 'LDPE Plastic')
('PRD0002', 'NestlÃ© Namkeen 200 g', 'Food', Decimal('200.00'), Decimal('410.27'), Decimal('47.69'), 'Low', 'Low', 'Medium', 240, 'Wrapper', 'LDPE Plastic')
('PRD0003', 'Sugar Cosmetics Cream 100 g', 'Cosmetics', Decimal('100.00'), Decimal('161.27'), Decimal('120.50'), 'Low', 'Low', 'Medium', 720, 'Tube', 'HDPE Plastic')
('PRD0004', 'Patanjali Rice 5000 g', 'Grocery', Decimal('5000.00'), Decimal('8920.73'), Decimal('324.28'), 'Low', 'Low', 'Low', 365, 'Bag', 'Kraft Paper (Unbleached)')
('PRD0005', 'Milky Mist Buttermilk 500 ml', 'Dairy', Decimal('500.00'), Decimal('571.71'), Decimal('22.94'), 'Medium', 'High', 'High', 7, 'Pouch', 'HDPE Plastic')
