# Data Preparation for Model Training

This notebook prepares the XOR logic gate dataset for model training.

## 1. Import Libraries

In [None]:
import pandas as pd

## 2. Load Data

In [None]:
# Load the dataset
df = pd.read_csv('../data/raw/input.csv')
print(f"Dataset shape: {df.shape}")
df

## 3. Explore Data

### Data Types (df.dtypes)

In [None]:
df.dtypes

### Data Info (df.info())

In [None]:
df.info()

### Check for Missing Value

In [None]:
# Check for missing values
print("Missing Values:")
print(df.isnull().sum())

print(f"\nTotal missing values: {df.isnull().sum().sum()}")

### Statistical summary

In [None]:
df.describe()

## 4. Transform Data

### 4.1 Convert fields to float

In [None]:
df = df.astype(float)

## 5. Data Summary

### data types

In [None]:
df.dtypes

### data info

In [None]:
df.info()

### df (data)

In [None]:
df

## 6. Save Prepared Data

### prepare output directory

In [None]:
import os
output_dir = '../data/processed'
os.makedirs(output_dir, exist_ok=True)

### Save as CSV

In [None]:
csv_path = os.path.join(output_dir, 'prepared_data.csv')
df.to_csv(csv_path, index=False)
print(f"✓ Saved CSV: {csv_path}")

### Save as Parquet

In [None]:
parquet_path = os.path.join(output_dir, 'prepared_data.parquet')
df.to_parquet(parquet_path, index=False)
print(f"✓ Saved Parquet: {parquet_path}")

### Verify saved files

In [None]:
print("Saved files:")
for f in os.listdir(output_dir):
    filepath = os.path.join(output_dir, f)
    size = os.path.getsize(filepath)
    print(f"  - {f} ({size} bytes)")