In [None]:
import csv

file_path = 'vehicle_emission_dataset.csv'
prepared_file_path = 'vehicle_dataset_clean.csv'

kolom = [
    'Vehicle Type', 
    'Fuel Type', 
    'Engine Size', 
    'Age of Vehicle', 
    'Mileage',
    'Emission Level'
]

vehicle_type_map = {
    'Motorcycle': 1,
    'Car': 2,
    'Bus': 3,
    'Truck': 4
}

fuel_type_map = {
    'Electric': 1,
    'Hybrid': 2,
    'Petrol': 3,
    'Diesel': 4
}

data = []
with open(file_path, 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        data.append({key: row[key] for key in kolom})

data = [row for row in data if all(row.values())]

data


[{'Vehicle Type': 'Motorcycle',
  'Fuel Type': 'Electric',
  'Engine Size': '2.747608618006285',
  'Age of Vehicle': '22',
  'Mileage': '291288',
  'Emission Level': 'Medium'},
 {'Vehicle Type': 'Bus',
  'Fuel Type': 'Electric',
  'Engine Size': '5.743714393331564',
  'Age of Vehicle': '3',
  'Mileage': '188398',
  'Emission Level': 'Medium'},
 {'Vehicle Type': 'Bus',
  'Fuel Type': 'Hybrid',
  'Engine Size': '4.606368497419306',
  'Age of Vehicle': '17',
  'Mileage': '281451',
  'Emission Level': 'High'},
 {'Vehicle Type': 'Truck',
  'Fuel Type': 'Electric',
  'Engine Size': '3.91302411782459',
  'Age of Vehicle': '4',
  'Mileage': '151321',
  'Emission Level': 'High'},
 {'Vehicle Type': 'Truck',
  'Fuel Type': 'Hybrid',
  'Engine Size': '1.6112969303006701',
  'Age of Vehicle': '15',
  'Mileage': '91810',
  'Emission Level': 'Low'},
 {'Vehicle Type': 'Car',
  'Fuel Type': 'Petrol',
  'Engine Size': '1.6111715057482539',
  'Age of Vehicle': '13',
  'Mileage': '72131',
  'Emission Leve

In [19]:
# label encoding
def label_encode(value, encoding_map):
    return encoding_map.get(value, None)

for row in data:
    row['Vehicle Type'] = label_encode(row['Vehicle Type'], vehicle_type_map)
    row['Fuel Type'] = label_encode(row['Fuel Type'], fuel_type_map)

# normalisasi kolom
normalisasi_kolom = ['Engine Size', 'Age of Vehicle', 'Mileage', 'Vehicle Type', 'Fuel Type']

for column in normalisasi_kolom:
    col_values = [float(row[column]) for row in data]
    min_val, max_val = min(col_values), max(col_values)
    for row in data:
        row[column] = (float(row[column]) - min_val) / (max_val - min_val)

nama_kolom = {col: col.lower().replace(' ', '_') for col in kolom}
data = [
    {nama_kolom[key]: value for key, value in row.items()}
    for row in data
]

with open(prepared_file_path, 'w', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=list(nama_kolom.values()))
    writer.writeheader()
    writer.writerows(data)

data

[{'vehicle_type': 0.0,
  'fuel_type': 0.0,
  'engine_size': 0.3746386134092265,
  'age_of_vehicle': 0.7586206896551724,
  'mileage': 0.9713982662179336,
  'emission_level': 'Medium'},
 {'vehicle_type': 0.6666666666666666,
  'fuel_type': 0.0,
  'engine_size': 0.9509822237862713,
  'age_of_vehicle': 0.10344827586206896,
  'mileage': 0.6282108156247186,
  'emission_level': 'Medium'},
 {'vehicle_type': 0.6666666666666666,
  'fuel_type': 0.3333333333333333,
  'engine_size': 0.732197544922115,
  'age_of_vehicle': 0.5862068965517241,
  'mileage': 0.9385871577381448,
  'emission_level': 'High'},
 {'vehicle_type': 1.0,
  'fuel_type': 0.0,
  'engine_size': 0.598822880295374,
  'age_of_vehicle': 0.13793103448275862,
  'mileage': 0.504541254873969,
  'emission_level': 'High'},
 {'vehicle_type': 1.0,
  'fuel_type': 0.3333333333333333,
  'engine_size': 0.15605287922072456,
  'age_of_vehicle': 0.5172413793103449,
  'mileage': 0.3060435546868485,
  'emission_level': 'Low'},
 {'vehicle_type': 0.3333333

In [17]:
import csv

# File path for prepared data
prepared_file_path = 'vehicle_dataset_clean.csv'

# Count categories in Emission Level
category_counts = {
    "Low": 0,
    "Medium": 0,
    "High": 0
}

# Read the prepared dataset and count emission levels
with open(prepared_file_path, 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        emission_level = row['emission_level']
        if emission_level in category_counts:
            category_counts[emission_level] += 1

print("Emission Level Counts:")
for category, count in category_counts.items():
    print(f"{category}: {count}")


Emission Level Counts:
Low: 2305
Medium: 3274
High: 4421
