In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import os

In [3]:
RESOURCES_DIR = Path("../resources")
TASK1_DIR = RESOURCES_DIR / "task_1"
TASK2_DIR = RESOURCES_DIR / "task_2"

In [4]:
# Step 1: Check if data files exist
supplier1_path = TASK1_DIR / "supplier_data1.xlsx"
supplier2_path = TASK1_DIR / "supplier_data2.xlsx"

print(f"Checking for data files...")
print(f"Looking in: {TASK1_DIR}")
print(f"supplier_data1.xlsx exists: {supplier1_path.exists()}")
print(f"supplier_data2.xlsx exists: {supplier2_path.exists()}")

Checking for data files...
Looking in: ../resources/task_1
supplier_data1.xlsx exists: True
supplier_data2.xlsx exists: True


In [5]:
# Step 2: Load and examine supplier_data1.xlsx

supplier1_df = pd.read_excel(supplier1_path)
supplier1_df.head()

Unnamed: 0,Quality/Choice,Grade,Finish,Thickness (mm),Width (mm),Description,Gross weight (kg),RP02,RM,Quantity,AG,AI
0,3rd,C200S,gebeizt und geglüht,2.77,1100,Längs- oder Querisse,13983,333.6,606.2,0.0,16.11,0.0054
1,3rd,C300S,ungebeizt,2.65,1075,Längs- oder Querisse,13047,717.7,0.0,0.0,16.11,0.0046
2,3rd,C100S,gebeizt und geglüht,2.2,1100,Kantenfehler - FS-Kantenrisse,14155,368.9,0.0,10.84,0.0,0.0061
3,2nd,C100S,gebeizt,2.86,1100,Längs- oder Querisse,11381,368.9,601.7,22.87,0.0,0.0062
4,1st,C300S,ungebeizt,2.88,1050,Sollmasse (Gewicht) unterschritten,10072,0.0,1213.0,22.87,0.0,0.0041


In [6]:
print(f"Shape: {supplier1_df.shape}")

Shape: (50, 12)


In [7]:
print(f"\nData types:")
print(supplier1_df.dtypes)


Data types:
Quality/Choice        object
Grade                 object
Finish                object
Thickness (mm)       float64
Width (mm)             int64
Description           object
Gross weight (kg)      int64
RP02                 float64
RM                   float64
Quantity             float64
AG                   float64
AI                   float64
dtype: object


In [8]:
print(f"\nMissing values:")
print(supplier1_df.isnull().sum())


Missing values:
Quality/Choice       0
Grade                0
Finish               0
Thickness (mm)       0
Width (mm)           0
Description          0
Gross weight (kg)    0
RP02                 0
RM                   0
Quantity             0
AG                   0
AI                   0
dtype: int64


In [10]:
# Step 3: Load and examine supplier_data2.xlsx

supplier2_df = pd.read_excel(supplier2_path)
supplier2_df.head()

Unnamed: 0,Material,Description,Article ID,Weight (kg),Quantity,Reserved
0,HDC,Material is Oiled,23048203,24469,52,NOT RESERVED
1,S235JR,Material is Oiled,23040547,16984,41,NOT RESERVED
2,S235JR,Material is Painted,23046057,9162,28,NOT RESERVED
3,DX51D +AZ150,Material is Oiled,23041966,12119,66,VANILLA
4,HDC,Material is Painted,23043884,17260,26,NOT RESERVED


In [11]:
print(f"Shape: {supplier2_df.shape}")

Shape: (50, 6)


In [12]:
print(f"\nData types:")
print(supplier2_df.dtypes)


Data types:
Material       object
Description    object
Article ID      int64
Weight (kg)     int64
Quantity        int64
Reserved       object
dtype: object


In [13]:
print(f"\nMissing values:")
print(supplier2_df.isnull().sum())


Missing values:
Material       0
Description    0
Article ID     0
Weight (kg)    0
Quantity       0
Reserved       0
dtype: int64
