PCI range|Class<br>
86-100|Good<br>
71-85|Satisfactory<br>
56-70|Fair<br>
41-55|Poor<br>
26-40|Very Poor<br>
11-25|Serious<br>
0-10|Failed<br>

In [None]:
#random data gen ><
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Generate data
n_samples = 50
data = {
    'road_id': range(1, n_samples + 1),
    'age_years': np.random.randint(1, 30, n_samples),
    'traffic_volume_daily': np.random.randint(1000, 50000, n_samples),
    'climate_zone': np.random.choice(['Arid', 'Temperate', 'Tropical', 'Continental'], n_samples),
    'maintenance_frequency_years': np.random.randint(1, 10, n_samples),
    'last_repair_years_ago': np.random.randint(0, 15, n_samples),
    'surface_type': np.random.choice(['Asphalt', 'Concrete', 'Composite'], n_samples),
    'thickness_mm': np.random.randint(50, 300, n_samples),
    'subgrade_quality': np.random.choice(['Poor', 'Fair', 'Good', 'Excellent'], n_samples),
    'drainage_quality': np.random.choice(['Poor', 'Fair', 'Good', 'Excellent'], n_samples)
}

# Create DataFrame
df = pd.DataFrame(data)

# Calculate PCI based on the features (this is a simplified model for demonstration)
df['pci'] = 100 - (
    df['age_years'] * 1.5 +
    df['traffic_volume_daily'] * 0.0005 +
    (df['maintenance_frequency_years'] - 5) * 2 +
    df['last_repair_years_ago'] * 3 -
    (df['thickness_mm'] - 150) * 0.1 +
    np.random.normal(0, 5, n_samples)  # Add some random variation
)

# Adjust PCI based on categorical variables
df.loc[df['climate_zone'] == 'Arid', 'pci'] += 5
df.loc[df['climate_zone'] == 'Tropical', 'pci'] -= 5
df.loc[df['surface_type'] == 'Concrete', 'pci'] += 3
df.loc[df['subgrade_quality'] == 'Excellent', 'pci'] += 5
df.loc[df['subgrade_quality'] == 'Poor', 'pci'] -= 5
df.loc[df['drainage_quality'] == 'Excellent', 'pci'] += 5
df.loc[df['drainage_quality'] == 'Poor', 'pci'] -= 5

# Ensure PCI is within 0-100 range
df['pci'] = np.clip(df['pci'], 0, 100).round(2)

# Display the first few rows of the dataset
print(df.head())

# Save the dataset to a CSV file
df.to_csv('pci_dataset.csv', index=False)
print("\nDataset saved as 'pci_dataset.csv'")

# DATASET KEY

PCI (Pavement Condition Index): Values such as 20, 12, 41.4, and 43.6 for different road sections.<br>
Rutting (mm): Values like 4.80, 4.4, 8.7, and 8.8.<br>
Fatigue Cracking (m²): Measurements including 2.00, 1.5, 2.1, and 2.4.<br>
Block Cracking (m²): Values include 0.0, 0.0, 0.9, and 0.6.<br>
Longitudinal Cracking (m²): Recorded as 26.0, 47.5, 16.9, and 18.8.<br>
Transverse Cracking (m²): Values like 0.0, 0.0, 5.0, and 3.2.<br>
Patching (m²): Recorded as 118.0, 96.0, 5.2, and 3.8.<br>
Potholes (Number): Count of potholes such as 5, 41, 63, and 51.<br>
Delamination (m²): Measurements including 18.0, 13.5, 5.5, and 5.7.<br>
Severity Rating: All set to "medium" as an example; these can be updated based on more specific data.<br>
Traffic Volume: Example values such as 5000, 10000, 15000, and 12000 vehicles/day.<br>
Temperature (°C): Example temperatures like 22.0, 24.0, 20.0, and 21.0.<br>
Precipitation (mm): Precipitation values such as 75.0, 80.0, 100.0, and 85.0.<br>
Maintenance History: Example maintenance history as 'yes' or 'no'.<br>

In [4]:
#24 lines dataset

import pandas as pd

# Extended dataset with 24 rows
data = {
    'PCI': [20, 12, 41.4, 43.6, 50, 60, 30, 70, 55, 65, 40, 45, 80, 85, 35, 25, 90, 75, 52, 62, 48, 58, 38, 28],
    'Rutting_mm': [4.80, 4.4, 8.7, 8.8, 5.5, 6.0, 7.8, 3.0, 9.2, 4.3, 6.1, 7.4, 2.2, 2.5, 8.0, 9.1, 2.0, 4.8, 6.3, 7.0, 5.2, 6.8, 7.5, 8.1],
    'Fatigue_Cracking_m2': [2.00, 1.5, 2.1, 2.4, 3.0, 3.5, 4.0, 1.0, 4.5, 1.2, 2.5, 3.8, 0.5, 0.3, 2.8, 3.2, 0.2, 2.9, 3.4, 4.1, 2.3, 3.6, 4.2, 3.9],
    'Block_Cracking_m2': [0.0, 0.0, 0.9, 0.6, 1.0, 0.5, 2.0, 0.4, 1.5, 0.8, 0.3, 1.2, 0.0, 0.1, 1.0, 1.3, 0.2, 0.7, 1.4, 2.1, 0.6, 0.9, 1.6, 1.8],
    'Longitudinal_Cracking_m2': [26.0, 47.5, 16.9, 18.8, 25.0, 30.0, 20.0, 15.0, 28.0, 22.0, 35.0, 40.0, 10.0, 8.0, 32.0, 45.0, 12.0, 18.0, 38.0, 42.0, 25.5, 29.0, 31.0, 36.0],
    'Transverse_Cracking_m2': [0.0, 0.0, 5.0, 3.2, 4.0, 4.5, 2.0, 1.5, 6.0, 2.2, 3.0, 3.5, 1.0, 0.8, 4.8, 5.2, 1.3, 3.1, 4.2, 4.9, 2.5, 3.6, 4.4, 5.0],
    'Patching_m2': [118.0, 96.0, 5.2, 3.8, 10.0, 8.0, 15.0, 7.0, 20.0, 9.0, 12.0, 6.5, 4.0, 5.0, 11.0, 14.0, 2.5, 8.5, 13.0, 9.5, 6.0, 7.5, 10.5, 12.5],
    'Potholes_Num': [5, 41, 63.0, 51.0, 30, 20, 40, 10, 35, 15, 25, 45, 5, 7, 55, 60, 3, 18, 22, 38, 12, 28, 33, 42],
    'Delamination_m2': [18.0, 13.5, 5.5, 5.7, 7.0, 6.5, 8.5, 4.5, 9.0, 4.0, 6.0, 5.8, 3.0, 3.5, 7.5, 8.0, 2.0, 5.6, 6.7, 7.8, 4.2, 5.9, 7.2, 6.9],
    'Severity_Rating': ['medium', 'medium', 'medium', 'medium', 'high', 'high', 'high', 'low', 'high', 'low', 'medium', 'high', 'low', 'low', 'medium', 'high', 'low', 'medium', 'high', 'medium', 'medium', 'medium', 'high', 'high'],
    'Traffic_Volume': [5000, 10000, 15000, 12000, 18000, 13000, 17000, 14000, 16000, 15000, 11000, 10000, 9000, 8000, 16000, 18000, 12000, 14000, 11000, 15000, 13000, 12000, 10000, 8000],
    'Temperature_C': [22.0, 24.0, 20.0, 21.0, 23.0, 19.0, 22.5, 25.0, 24.5, 21.5, 20.5, 19.5, 18.0, 17.0, 26.0, 25.5, 23.5, 22.5, 21.5, 20.5, 24.0, 23.0, 22.0, 21.0],
    'Precipitation_mm': [75.0, 80.0, 100.0, 85.0, 90.0, 95.0, 70.0, 65.0, 60.0, 75.0, 85.0, 90.0, 55.0, 50.0, 110.0, 105.0, 100.0, 80.0, 85.0, 75.0, 90.0, 95.0, 100.0, 105.0],
    'Maintenance_History': ['yes', 'yes', 'no', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no']
}

# Convert the dictionary into a pandas DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

# Save the dataset to a CSV file
df.to_csv('road_pavement_extended_dataset.csv', index=False)

# If needed, encode categorical data
df_encoded = pd.get_dummies(df, columns=['Severity_Rating', 'Maintenance_History'])

# Show the encoded dataset
print(df_encoded)

# Save the encoded dataset to a CSV file
df_encoded.to_csv('road_pavement_20_line_dataset_encoded.csv', index=False)


     PCI  Rutting_mm  Fatigue_Cracking_m2  Block_Cracking_m2  \
0   20.0         4.8                  2.0                0.0   
1   12.0         4.4                  1.5                0.0   
2   41.4         8.7                  2.1                0.9   
3   43.6         8.8                  2.4                0.6   
4   50.0         5.5                  3.0                1.0   
5   60.0         6.0                  3.5                0.5   
6   30.0         7.8                  4.0                2.0   
7   70.0         3.0                  1.0                0.4   
8   55.0         9.2                  4.5                1.5   
9   65.0         4.3                  1.2                0.8   
10  40.0         6.1                  2.5                0.3   
11  45.0         7.4                  3.8                1.2   
12  80.0         2.2                  0.5                0.0   
13  85.0         2.5                  0.3                0.1   
14  35.0         8.0                  2.

In [3]:
#70 lines dataset

import pandas as pd

# Extended dataset with 70 rows
data = {
    'PCI': [20, 12, 41.4, 43.6, 50, 60, 30, 70, 55, 65, 40, 45, 80, 85, 35, 25, 90, 75, 52, 62, 
            48, 58, 38, 28, 50, 30, 60, 55, 70, 45, 35, 40, 65, 75, 85, 30, 45, 50, 55, 60,
            70, 80, 90, 20, 30, 40, 50, 60, 45, 55, 65, 75, 85, 95, 50, 55, 60, 65, 70, 75,
            40, 30, 20, 10, 15, 25, 35, 45, 55, 65],
    
    'Rutting_mm': [4.80, 4.4, 8.7, 8.8, 5.5, 6.0, 7.8, 3.0, 9.2, 4.3, 6.1, 7.4, 2.2, 2.5, 8.0, 9.1, 2.0, 4.8, 
                   6.3, 7.0, 5.2, 6.8, 7.5, 8.1, 4.9, 5.7, 6.2, 7.4, 8.5, 9.0, 5.1, 6.4, 7.8, 8.2, 9.3, 4.0, 
                   5.8, 6.5, 7.7, 8.4, 9.2, 3.5, 4.7, 6.0, 7.5, 8.0, 6.9, 5.6, 7.3, 8.1, 9.0, 5.0, 6.2, 7.5, 
                   8.0, 9.2, 4.7, 5.8, 6.9, 7.5, 8.4, 9.3, 5.5, 4.8, 3.6, 7.9, 8.2, 9.0, 6.7, 5.6, 7.1, 8.5],
    
    'Fatigue_Cracking_m2': [2.00, 1.5, 2.1, 2.4, 3.0, 3.5, 4.0, 1.0, 4.5, 1.2, 2.5, 3.8, 0.5, 0.3, 2.8, 3.2, 0.2, 
                            2.9, 3.4, 4.1, 2.3, 3.6, 4.2, 3.9, 2.2, 3.1, 4.0, 1.5, 2.8, 3.9, 1.4, 3.7, 4.1, 2.9, 
                            3.5, 4.0, 2.1, 3.6, 4.4, 1.9, 2.5, 3.8, 4.3, 1.5, 2.0, 3.4, 4.0, 2.5, 3.1, 4.2, 1.9, 
                            2.8, 3.5, 4.1, 1.7, 2.3, 3.6, 4.2, 2.8, 3.7, 4.0, 1.9, 2.8, 3.5, 4.0, 2.1, 3.4, 4.3, 
                            1.8, 2.7],
    
    'Block_Cracking_m2': [0.0, 0.0, 0.9, 0.6, 1.0, 0.5, 2.0, 0.4, 1.5, 0.8, 0.3, 1.2, 0.0, 0.1, 1.0, 1.3, 0.2, 0.7, 
                          1.4, 2.1, 0.6, 0.9, 1.6, 1.8, 0.5, 0.2, 1.1, 1.5, 2.2, 1.7, 0.3, 0.6, 1.2, 1.8, 2.3, 0.4, 
                          0.9, 1.5, 2.0, 0.7, 1.2, 1.6, 2.3, 0.3, 0.8, 1.4, 2.0, 0.5, 1.1, 1.8, 0.6, 1.0, 1.7, 2.2, 
                          0.9, 1.3, 1.9, 2.4, 0.4, 1.0, 1.6, 2.1, 0.7, 1.2, 1.8, 2.5, 0.6, 1.1, 1.7, 2.3, 0.8],
    
    'Longitudinal_Cracking_m2': [26.0, 47.5, 16.9, 18.8, 25.0, 30.0, 20.0, 15.0, 28.0, 22.0, 35.0, 40.0, 10.0, 8.0, 
                                 32.0, 45.0, 12.0, 18.0, 38.0, 42.0, 25.5, 29.0, 31.0, 36.0, 14.0, 20.0, 28.5, 32.5, 
                                 15.5, 22.5, 33.0, 38.5, 20.0, 26.5, 35.0, 15.0, 21.0, 27.5, 34.0, 18.5, 25.0, 30.5, 
                                 37.0, 16.0, 23.5, 28.0, 33.5, 19.0, 26.0, 31.5, 36.5, 17.0, 24.0, 29.5, 34.5, 20.0, 
                                 26.5, 35.5, 16.0, 23.0, 31.0, 36.5, 19.5, 24.5, 29.0, 33.5, 21.5, 28.5, 32.0, 36.0],
    
    'Transverse_Cracking_m2': [0.0, 0.0, 5.0, 3.2, 4.0, 4.5, 2.0, 1.5, 6.0, 2.2, 3.0, 3.5, 1.0, 0.8, 4.8, 5.2, 1.3, 
                               3.1, 4.2, 4.9, 2.5, 3.6, 4.4, 5.0, 1.9, 2.8, 3.7, 4.1, 2.3, 3.4, 4.0, 4.8, 1.7, 2.6, 
                               3.8, 4.3, 1.6, 2.7, 3.9, 4.4, 2.1, 3.2, 4.6, 5.0, 2.4, 3.5, 4.7, 5.3, 2.9, 3.6, 4.5, 
                               5.2, 1.8, 2.9, 3.7, 4.8, 2.2, 3.1, 4.0, 4.5, 2.6, 3.3, 4.2, 4.9, 2.0, 2.7, 3.6, 4.4, 
                               1.8],
    
    'Patching_m2': [118.0, 96.0, 5.2, 3.8, 10.0, 8.0, 15.0, 7.0, 20.0, 9.0, 12.0, 6.5, 4.0, 5.0, 11.0, 14.0, 2.5, 
                    8.5, 13.0, 9.5, 6.0, 7.5, 10.5, 12.5, 4.5, 7.0, 10.0, 13.5, 5.0, 8.5, 12.0, 14.5, 6.0, 9.5, 11.5, 
                    3.0, 6.5, 9.0, 12.0, 4.0, 7.5, 11.0, 14.0, 5.5, 8.0, 10.5, 13.0, 6.5, 9.0, 12.5, 15.0, 7.0, 10.0, 
                    13.5, 15.5, 6.0, 9.5, 11.0, 13.0, 4.5, 8.0, 11.5, 14.0, 5.0, 8.5, 10.0, 12.5, 6.5, 9.0, 12.0, 15.0],
    
    'Potholes_Num': [5, 41, 63.0, 51.0, 30, 20, 40, 10, 35, 15, 25, 45, 5, 7, 55, 60, 3, 18, 22, 38, 12, 28, 33, 
                     42, 14, 21, 35, 45, 10, 17, 28, 36, 50, 58, 62, 11, 23, 39, 51, 65, 72, 7, 16, 24, 33, 40, 9, 19, 
                     29, 38, 52, 61, 67, 12, 25, 36, 49, 58, 10, 22, 34, 47, 53, 6, 18, 30, 43, 56, 15, 27],
    
    'Delamination_m2': [18.0, 13.5, 5.5, 5.7, 7.0, 6.5, 8.5, 4.5, 9.0, 4.0, 6.0, 5.8, 3.0, 3.5, 7.5, 8.0, 2.0, 5.6, 
                        6.7, 7.8, 4.2, 5.9, 7.2, 6.9, 3.4, 4.5, 6.0, 7.5, 2.8, 4.1, 6.4, 7.7, 3.6, 5.1, 6.9, 2.4, 4.3, 
                        5.5, 7.0, 3.8, 5.2, 6.6, 7.8, 3.2, 4.7, 6.1, 7.4, 3.9, 5.3, 6.8, 7.9, 4.1, 5.7, 6.3, 7.6, 3.5, 
                        4.9, 6.2, 7.4, 3.1, 4.6, 6.0, 7.2, 3.7, 4.8, 6.4, 7.6, 4.0, 5.3, 6.8, 7.9, 3.8],
    
    'Severity_Rating': ['medium', 'medium', 'medium', 'medium', 'high', 'high', 'high', 'low', 'high', 'low', 'medium', 
                        'high', 'low', 'low', 'medium', 'high', 'low', 'medium', 'high', 'medium', 'medium', 'medium', 
                        'high', 'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'medium', 'high', 'low', 'medium', 
                        'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'medium', 'high', 'medium', 'low', 'high', 
                        'medium', 'high', 'medium', 'low', 'high', 'medium', 'high', 'low', 'medium', 'high', 'low', 'high', 
                        'medium', 'high', 'low', 'medium', 'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'medium'],
    
    'Traffic_Volume': [5000, 10000, 15000, 12000, 18000, 13000, 17000, 14000, 16000, 15000, 11000, 10000, 9000, 
                       8000, 16000, 18000, 12000, 14000, 11000, 15000, 13000, 12000, 10000, 8000, 6000, 12000, 
                       15000, 18000, 13000, 17000, 14000, 16000, 15000, 11000, 10000, 9000, 8000, 16000, 18000, 
                       12000, 14000, 11000, 15000, 13000, 12000, 10000, 8000, 12000, 15000, 18000, 13000, 17000, 
                       14000, 16000, 15000, 11000, 10000, 9000, 8000, 16000, 18000, 12000, 14000, 11000, 15000, 
                       13000, 12000, 10000, 8000, 6000],
    
    'Temperature_C': [22.0, 24.0, 20.0, 21.0, 23.0, 19.0, 22.5, 25.0, 24.5, 21.5, 20.5, 19.5, 18.0, 17.0, 26.0, 25.5, 
                      23.5, 22.5, 21.5, 20.5, 24.0, 23.0, 22.0, 21.0, 20.5, 23.5, 25.0, 22.0, 21.5, 24.5, 25.5, 23.0, 
                      20.0, 19.5, 18.0, 17.5, 26.0, 25.5, 22.5, 21.5, 24.0, 23.5, 22.0, 21.0, 20.0, 24.5, 25.0, 23.0, 
                      22.5, 21.5, 20.5, 24.0, 23.5, 22.0, 21.0, 20.5, 23.5, 25.0, 22.0, 21.5, 24.5, 25.5, 23.0, 20.0, 
                      19.5, 18.0, 17.5, 26.0, 25.5, 22.5, 21.5],
    
    'Precipitation_mm': [75.0, 80.0, 100.0, 85.0, 90.0, 95.0, 70.0, 65.0, 60.0, 75.0, 85.0, 90.0, 55.0, 50.0, 110.0, 
                         105.0, 100.0, 80.0, 85.0, 75.0, 90.0, 95.0, 100.0, 105.0, 60.0, 85.0, 100.0, 75.0, 80.0, 90.0, 
                         95.0, 70.0, 65.0, 60.0, 55.0, 50.0, 105.0, 110.0, 100.0, 85.0, 90.0, 95.0, 70.0, 65.0, 60.0, 
                         75.0, 85.0, 90.0, 100.0, 105.0, 110.0, 55.0, 50.0, 105.0, 110.0, 100.0, 85.0, 90.0, 95.0, 
                         70.0, 65.0, 60.0, 55.0, 50.0, 105.0, 110.0, 100.0, 85.0, 90.0, 95.0],
    
    'Maintenance_History': ['yes', 'yes', 'no', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 
                             'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 
                             'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 
                             'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 
                             'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no']
}

# Convert the dictionary into a pandas DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

# Save the dataset to a CSV file
df.to_csv('road_pavement_70_rows_dataset.csv', index=False)

# If needed, encode categorical data
df_encoded = pd.get_dummies(df, columns=['Severity_Rating', 'Maintenance_History'])

# Show the encoded dataset
print(df_encoded)

# Save the encoded dataset to a CSV file
df_encoded.to_csv('road_pavement_70_rows_dataset_encoded.csv', index=False)


ValueError: All arrays must be of the same length

In [None]:
data = {
    'PCI': [20, 12, 41.4, 43.6, 50, 60, 30, 70, 55, 65, 40, 45, 80, 85, 35, 25, 90, 75, 52, 62, 
            48, 58, 38, 28, 50, 30, 60, 55, 70, 45, 35, 40, 65, 75, 85, 30, 45, 50, 55, 60,
            70, 80, 90, 20, 30, 40, 50, 60, 45, 55, 65, 75, 85, 95, 50, 55, 60, 65, 70, 75,
            40, 30, 20, 10, 15, 25, 35, 45, 55, 65],

    'Rutting_mm': [4.80, 4.4, 8.7, 8.8, 5.5, 6.0, 7.8, 3.0, 9.2, 4.3, 6.1, 7.4, 2.2, 2.5, 8.0, 9.1, 2.0, 4.8, 
                   6.3, 7.0, 5.2, 6.8, 7.5, 8.1, 4.9, 5.7, 6.2, 7.4, 8.5, 9.0, 5.1, 6.4, 7.8, 8.2, 9.3, 4.0, 
                   5.8, 6.5, 7.7, 8.4, 9.2, 3.5, 4.7, 6.0, 7.5, 8.0, 6.9, 5.6, 7.3, 8.1, 9.0, 5.0, 6.2, 7.5, 
                   8.0, 9.2, 4.7, 5.8, 6.9, 7.5, 8.4, 9.3, 5.5, 4.8, 3.6, 7.9, 8.2, 9.0, 6.7, 5.6, 7.1, 8.5],

    'Fatigue_Cracking_m2': [2.00, 1.5, 2.1, 2.4, 3.0, 3.5, 4.0, 1.0, 4.5, 1.2, 2.5, 3.8, 0.5, 0.3, 2.8, 3.2, 0.2, 
                            2.9, 3.4, 4.1, 2.3, 3.6, 4.2, 3.9, 2.2, 3.1, 4.0, 1.5, 2.8, 3.9, 1.4, 3.7, 4.1, 2.9, 
                            3.5, 4.0, 2.1, 3.6, 4.4, 1.9, 2.5, 3.8, 4.3, 1.5, 2.0, 3.4, 4.0, 2.5, 3.1, 4.2, 1.9, 
                            2.8, 3.5, 4.1, 1.7, 2.3, 3.6, 4.2, 2.8, 3.7, 4.0, 1.9, 2.8, 3.5, 4.0, 2.1, 3.4, 4.3, 
                            1.8, 2.7],

    'Block_Cracking_m2': [0.0, 0.0, 0.9, 0.6, 1.0, 0.5, 2.0, 0.4, 1.5, 0.8, 0.3, 1.2, 0.0, 0.1, 1.0, 1.3, 0.2, 0.7, 
                          1.4, 2.1, 0.6, 0.9, 1.6, 1.8, 0.5, 0.2, 1.1, 1.5, 2.2, 1.7, 0.3, 0.6, 1.2, 1.8, 2.3, 0.4, 
                          0.9, 1.5, 2.0, 0.7, 1.2, 1.6, 2.3, 0.3, 0.8, 1.4, 2.0, 0.5, 1.1, 1.8, 0.6, 1.0, 1.7, 2.2, 
                          0.9, 1.3, 1.9, 2.4, 0.4, 1.0, 1.6, 2.1, 0.7, 1.2, 1.8, 2.5, 0.6, 1.1, 1.7, 2.3, 0.8],

    'Longitudinal_Cracking_m2': [26.0, 47.5, 16.9, 18.8, 25.0, 30.0, 20.0, 15.0, 28.0, 22.0, 35.0, 40.0, 10.0, 8.0, 
                                 32.0, 45.0, 12.0, 18.0, 38.0, 42.0, 25.5, 29.0, 31.0, 36.0, 14.0, 20.0, 28.5, 32.5, 
                                 15.5, 22.5, 33.0, 38.5, 20.0, 26.5, 35.0, 15.0, 21.0, 27.5, 34.0, 18.5, 25.0, 30.5, 
                                 37.0, 16.0, 23.5, 28.0, 33.5, 19.0, 26.0, 31.5, 36.5, 17.0, 24.0, 29.5, 34.5, 20.0, 
                                 26.5, 35.5, 16.0, 23.0, 31.0, 36.5, 19.5, 24.5, 29.0, 33.5, 21.5, 28.5, 32.0, 36.0],


# 'Transverse_Cracking_m2': [0.0, 0.0, 5.0, 3.2, 4.0, 4.5, 2.0, 1.5, 6.0, 2.2, 3.0, 3.5, 1.0, 0.8, 4.8, 5.2, 1.3, 
#                                3.1, 4.2, 4.9, 2.5, 3.6, 4.4, 5.0, 1.9, 2.8, 3.7, 4.1, 2.3, 3.4, 4.0, 4.8, 1.7, 2.6, 
#                                3.8, 4.3, 1.6, 2.7, 3.9, 4.4, 2.1, 3.2, 4.6, 5.0, 2.4, 3.5, 4.7, 5.3, 2.9, 3.6, 4.5, 
#                                5.2, 1.8, 2.9, 3.7, 4.8, 2.2, 3.1, 4.0, 4.5, 2.6, 3.3, 4.2, 4.9, 2.0, 2.7, 3.6, 4.4, 
#                                1.8],



'Patching_m2': [118.0, 96.0, 5.2, 3.8, 10.0, 8.0, 15.0, 7.0, 20.0, 9.0, 12.0, 6.5, 4.0, 5.0, 11.0, 14.0, 2.5, 
                    8.5, 13.0, 9.5, 6.0, 7.5, 10.5, 12.5, 4.5, 7.0, 10.0, 13.5, 5.0, 8.5, 12.0, 14.5, 6.0, 9.5, 11.5, 
                    3.0, 6.5, 9.0, 12.0, 4.0, 7.5, 11.0, 14.0, 5.5, 8.0, 10.5, 13.0, 6.5, 9.0, 12.5, 15.0, 7.0, 10.0, 
                    13.5, 15.5, 6.0, 9.5, 11.0, 13.0, 4.5, 8.0, 11.5, 14.0, 5.0, 8.5, 10.0, 12.5, 6.5, 9.0, 12.0, 15.0],

    'Potholes_Num': [5, 41, 63.0, 51.0, 30, 20, 40, 10, 35, 15, 25, 45, 5, 7, 55, 60, 3, 18, 22, 38, 12, 28, 33, 
                     42, 14, 21, 35, 45, 10, 17, 28, 36, 50, 58, 62, 11, 23, 39, 51, 65, 72, 7, 16, 24, 33, 40, 9, 19, 
                     29, 38, 52, 61, 67, 12, 25, 36, 49, 58, 10, 22, 34, 47, 53, 6, 18, 30, 43, 56, 15, 27],

    'Delamination_m2': [18.0, 13.5, 5.5, 5.7, 7.0, 6.5, 8.5, 4.5, 9.0, 4.0, 6.0, 5.8, 3.0, 3.5, 7.5, 8.0, 2.0, 5.6, 
                        6.7, 7.8, 4.2, 5.9, 7.2, 6.9, 3.4, 4.5, 6.0, 7.5, 2.8, 4.1, 6.4, 7.7, 3.6, 5.1, 6.9, 2.4, 4.3, 
                        5.5, 7.0, 3.8, 5.2, 6.6, 7.8, 3.2, 4.7, 6.1, 7.4, 3.9, 5.3, 6.8, 7.9, 4.1, 5.7, 6.3, 7.6, 3.5, 
                        4.9, 6.2, 7.4, 3.1, 4.6, 6.0, 7.2, 3.7, 4.8, 6.4, 7.6, 4.0, 5.3, 6.8, 7.9, 3.8],

    'Severity_Rating': ['medium', 'medium', 'medium', 'medium', 'high', 'high', 'high', 'low', 'high', 'low', 'medium', 
                        'high', 'low', 'low', 'medium', 'high', 'low', 'medium', 'high', 'medium', 'medium', 'medium', 
                        'high', 'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'medium', 'high', 'low', 'medium', 
                        'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'medium', 'high', 'medium', 'low', 'high', 
                        'medium', 'high', 'medium', 'low', 'high', 'medium', 'high', 'low', 'medium', 'high', 'low', 'high', 
                        'medium', 'high', 'low', 'medium', 'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'medium'],

    'Traffic_Volume': [5000, 10000, 15000, 12000, 18000, 13000, 17000, 14000, 16000, 15000, 11000, 10000, 9000, 
                       8000, 16000, 18000, 12000, 14000, 11000, 15000, 13000, 12000, 10000, 8000, 6000, 12000, 
                       15000, 18000, 13000, 17000, 14000, 16000, 15000, 11000, 10000, 9000, 8000, 16000, 18000, 
                       12000, 14000, 11000, 15000, 13000, 12000, 10000, 8000, 12000, 15000, 18000, 13000, 17000, 
                       14000, 16000, 15000, 11000, 10000, 9000, 8000, 16000, 18000, 12000, 14000, 11000, 15000, 
                       13000, 12000, 10000, 8000, 6000],

    'Temperature_C': [22.0, 24.0, 20.0, 21.0, 23.0, 19.0, 22.5, 25.0, 24.5, 21.5, 20.5, 19.5, 18.0, 17.0, 26.0, 25.5, 
                      23.5, 22.5, 21.5, 20.5, 24.0, 23.0, 22.0, 21.0, 20.5, 23.5, 25.0, 22.0, 21.5, 24.5, 25.5, 23.0, 
                      20.0, 19.5, 18.0, 17.5, 26.0, 25.5, 22.5, 21.5, 24.0, 23.5, 22.0, 21.0, 20.0, 24.5, 25.0, 23.0, 
                      22.5, 21.5, 20.5, 24.0, 23.5, 22.0, 21.0, 20.5, 23.5, 25.0, 22.0, 21.5, 24.5, 25.5, 23.0, 20.0, 
                      19.5, 18.0, 17.5, 26.0, 25.5, 22.5, 21.5],









In [6]:
import pandas as pd

# Data with 70 rows
data = {
    'PCI': [20, 12, 41.4, 43.6, 50, 60, 30, 70, 55, 65, 40, 45, 80, 85, 35, 25, 90, 75, 52, 62, 
            48, 58, 38, 28, 50, 30, 60, 55, 70, 45, 35, 40, 65, 75, 85, 30, 45, 50, 55, 60,
            70, 80, 90, 20, 30, 40, 50, 60, 45, 55, 65, 75, 85, 95, 50, 55, 60, 65, 70, 75,
            40, 30, 20, 10, 15, 25, 35, 45, 55, 65],

    'Rutting_mm': [4.80, 4.4, 8.7, 8.8, 5.5, 6.0, 7.8, 3.0, 9.2, 4.3, 6.1, 7.4, 2.2, 2.5, 8.0, 9.1, 2.0, 4.8, 
                   6.3, 7.0, 5.2, 6.8, 7.5, 8.1, 4.9, 5.7, 6.2, 7.4, 8.5, 9.0, 5.1, 6.4, 7.8, 8.2, 9.3, 4.0, 
                   5.8, 6.5, 7.7, 8.4, 9.2, 3.5, 4.7, 6.0, 7.5, 8.0, 6.9, 5.6, 7.3, 8.1, 9.0, 5.0, 6.2, 7.5, 
                   8.0, 9.2, 4.7, 5.8, 6.9, 7.5, 8.4, 9.3, 5.5, 4.8, 3.6, 7.9, 8.2, 9.0, 6.7, 5.6, 7.1, 8.5],

    'Fatigue_Cracking_m2': [2.00, 1.5, 2.1, 2.4, 3.0, 3.5, 4.0, 1.0, 4.5, 1.2, 2.5, 3.8, 0.5, 0.3, 2.8, 3.2, 0.2, 
                            2.9, 3.4, 4.1, 2.3, 3.6, 4.2, 3.9, 2.2, 3.1, 4.0, 1.5, 2.8, 3.9, 1.4, 3.7, 4.1, 2.9, 
                            3.5, 4.0, 2.1, 3.6, 4.4, 1.9, 2.5, 3.8, 4.3, 1.5, 2.0, 3.4, 4.0, 2.5, 3.1, 4.2, 1.9, 
                            2.8, 3.5, 4.1, 1.7, 2.3, 3.6, 4.2, 2.8, 3.7, 4.0, 1.9, 2.8, 3.5, 4.0, 2.1, 3.4, 4.3, 
                            1.8, 2.7],

    'Block_Cracking_m2': [0.0, 0.0, 0.9, 0.6, 1.0, 0.5, 2.0, 0.4, 1.5, 0.8, 0.3, 1.2, 0.0, 0.1, 1.0, 1.3, 0.2, 0.7, 
                          1.4, 2.1, 0.6, 0.9, 1.6, 1.8, 0.5, 0.2, 1.1, 1.5, 2.2, 1.7, 0.3, 0.6, 1.2, 1.8, 2.3, 0.4, 
                          0.9, 1.5, 2.0, 0.7, 1.2, 1.6, 2.3, 0.3, 0.8, 1.4, 2.0, 0.5, 1.1, 1.8, 0.6, 1.0, 1.7, 2.2, 
                          0.9, 1.3, 1.9, 2.4, 0.4, 1.0, 1.6, 2.1, 0.7, 1.2, 1.8, 2.5, 0.6, 1.1, 1.7, 2.3, 0.8],

    'Longitudinal_Cracking_m2': [26.0, 47.5, 16.9, 18.8, 25.0, 30.0, 20.0, 15.0, 28.0, 22.0, 35.0, 40.0, 10.0, 8.0, 
                                 32.0, 45.0, 12.0, 18.0, 38.0, 42.0, 25.5, 29.0, 31.0, 36.0, 14.0, 20.0, 28.5, 32.5, 
                                 15.5, 22.5, 33.0, 38.5, 20.0, 26.5, 35.0, 15.0, 21.0, 27.5, 34.0, 18.5, 25.0, 30.5, 
                                 37.0, 16.0, 23.5, 28.0, 33.5, 19.0, 26.0, 31.5, 36.5, 17.0, 24.0, 29.5, 34.5, 20.0, 
                                 26.5, 35.5, 16.0, 23.0, 31.0, 36.5, 19.5, 24.5, 29.0, 33.5, 21.5, 28.5, 32.0, 36.0],

    'Transverse_Cracking_m2': [0.0, 0.0, 5.0, 3.2, 4.0, 4.5, 2.0, 1.5, 6.0, 2.2, 3.0, 3.5, 1.0, 0.8, 4.8, 5.2, 1.3, 
                               3.1, 4.2, 4.9, 2.5, 3.6, 4.4, 5.0, 1.9, 2.8, 3.7, 4.1, 2.3, 3.4, 4.0, 4.8, 1.7, 2.6, 
                               3.8, 4.3, 1.6, 2.7, 3.9, 4.4, 2.1, 3.2, 4.6, 5.0, 2.4, 3.5, 4.7, 5.3, 2.9, 3.6, 4.5, 
                               5.2, 1.8, 2.9, 3.7, 4.8, 2.2, 3.1, 4.0, 4.5, 2.6, 3.3, 4.2, 4.9, 2.0, 2.7, 3.6, 4.4, 
                               1.8],

    'Patching_m2': [118.0, 96.0, 5.2, 3.8, 10.0, 8.0, 15.0, 7.0, 20.0, 9.0, 12.0, 6.5, 4.0, 5.0, 11.0, 14.0, 2.5, 
                    8.5, 13.0, 9.5, 6.0, 7.5, 10.5, 12.5, 4.5, 7.0, 10.0, 13.5, 5.0, 8.5, 12.0, 14.5, 6.0, 9.5, 11.5, 
                    3.0, 6.5, 9.0, 12.0, 4.0, 7.5, 11.0, 14.0, 5.5, 8.0, 10.5, 13.0, 6.5, 9.0, 12.5, 15.0, 7.0, 10.0, 
                    13.5, 15.5, 6.0, 9.5, 11.0, 13.0, 4.5, 8.0, 11.5, 14.0, 5.0, 8.5, 10.0, 12.5, 6.5, 9.0, 12.0, 15.0],

    'Potholes_Num': [5, 41, 63.0, 51.0, 30, 20, 40, 10, 35, 15, 25, 45, 5, 7, 55, 60, 3, 18, 22, 38, 12, 28, 33, 
                     42, 14, 21, 35, 45, 10, 17, 28, 36, 50, 58, 62, 11, 23, 39, 51, 65, 72, 7, 16, 24, 33, 40, 9, 19, 
                     29, 38, 52, 61, 67, 12, 25, 36, 49, 58, 10, 22, 34, 47, 53, 6, 18, 30, 43, 56, 15, 27],

    'Delamination_m2': [18.0, 13.5, 5.5, 5.7, 7.0, 6.5, 8.5, 4.5, 9.0, 4.0, 6.0, 5.8, 3.0, 3.5, 7.5, 8.0, 2.0, 5.6, 
                        6.7, 7.8, 4.2, 5.9, 7.2, 6.9, 3.4, 4.5, 6.0, 7.5, 2.8, 4.1, 6.4, 7.7, 3.6, 5.1, 6.9, 2.4, 4.3, 
                        5.5, 7.0, 3.8, 5.2, 6.6, 7.8, 3.2, 4.7, 6.1, 7.4, 3.9, 5.3, 6.8, 7.9, 4.1, 5.7, 6.3, 7.6, 3.5, 
                        4.9, 6.2, 7.4, 3.1, 4.6, 6.0, 7.2, 3.7, 4.8, 6.4, 7.6, 4.0, 5.3, 6.8, 7.9, 3.8],

    'Severity_Rating': ['medium', 'medium', 'medium', 'medium', 'high', 'high', 'high', 'low', 'high', 'low', 'medium', 
                        'high', 'low', 'low', 'medium', 'high', 'low', 'medium', 'high', 'medium', 'medium', 'medium', 
                        'high', 'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'medium', 'high', 'low', 'medium', 
                        'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'medium', 'high', 'medium', 'low', 'high', 
                        'medium', 'high', 'medium', 'low', 'high', 'medium', 'high', 'low', 'medium', 'high', 'low', 'high', 
                        'medium', 'high', 'low', 'medium', 'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'medium'],

    'Traffic_Volume': [5000, 10000, 15000, 12000, 18000, 13000, 17000, 14000, 16000, 15000, 11000, 10000, 9000, 
                       8000, 16000, 18000, 12000, 14000, 11000, 15000, 13000, 12000, 10000, 8000, 6000, 12000, 
                       15000, 18000, 13000, 17000, 14000, 16000, 15000, 11000, 10000, 9000, 8000, 16000, 18000, 
                       12000, 14000, 11000, 15000, 13000, 12000, 10000, 8000, 12000, 15000, 18000, 13000, 17000, 
                       14000, 16000, 15000, 11000, 10000, 9000, 8000, 16000, 18000, 12000, 14000, 11000, 15000, 
                       13000, 12000, 10000, 8000, 6000],

    'Temperature_C': [22.0, 24.0, 20.0, 21.0, 23.0, 19.0, 22.5, 25.0, 24.5, 21.5, 20.5, 19.5, 18.0, 17.0, 26.0, 25.5, 
                      23.5, 22.5, 21.5, 20.5, 24.0, 23.0, 22.0, 21.0, 20.5, 23.5, 25.0, 22.0, 21.5, 24.5, 25.5, 23.0, 
                      20.0, 19.5, 18.0, 17.5, 26.0, 25.5, 22.5, 21.5, 24.0, 23.5, 22.0, 21.0, 20.0, 24.5, 25.0, 23.0, 
                      22.5, 21.5, 20.5, 24.0, 23.5, 22.0, 21.0, 20.5, 23.5, 25.0, 22.0, 21.5, 24.5, 25.5, 23.0, 20.0, 
                      19.5, 18.0, 17.5, 26.0, 25.5, 22.5, 21.5],

    'Precipitation_mm': [75.0, 80.0, 100.0, 85.0, 90.0, 95.0, 70.0, 65.0, 60.0, 75.0, 85.0, 90.0, 55.0, 50.0, 110.0, 
                         105.0, 100.0, 80.0, 85.0, 75.0, 90.0, 95.0, 100.0, 105.0, 60.0, 85.0, 100.0, 75.0, 80.0, 90.0, 
                         95.0, 70.0, 65.0, 60.0, 55.0, 50.0, 105.0, 110.0, 100.0, 85.0, 90.0, 95.0, 70.0, 65.0, 60.0, 
                         75.0, 85.0, 90.0, 100.0, 105.0, 110.0, 55.0, 50.0, 105.0, 110.0, 100.0, 85.0, 90.0, 95.0, 
                         70.0, 65.0, 60.0, 55.0, 50.0, 105.0, 110.0, 100.0, 85.0, 90.0, 95.0],

    'Maintenance_History': ['yes', 'yes', 'no', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 
                             'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 
                             'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 
                             'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 
                             'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no', 'yes', 'no']
}

for i in data:
    print(i, len(data[i]))

# Convert the dictionary into a pandas DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

# Save the dataset to a CSV file
df.to_csv('road_pavement_70_rows_dataset.csv', index=False)

# If needed, encode categorical data
df_encoded = pd.get_dummies(df, columns=['Severity_Rating', 'Maintenance_History'])

# Show the encoded dataset
print(df_encoded)

# Save the encoded dataset to a CSV file
df_encoded.to_csv('road_pavement_70_rows_dataset_encoded.csv', index=False)


PCI 70
Rutting_mm 72
Fatigue_Cracking_m2 70
Block_Cracking_m2 71
Longitudinal_Cracking_m2 70
Transverse_Cracking_m2 69
Patching_m2 71
Potholes_Num 70
Delamination_m2 72
Severity_Rating 70
Traffic_Volume 70
Temperature_C 71
Precipitation_mm 70
Maintenance_History 70


ValueError: All arrays must be of the same length

In [8]:
data = {
    'PCI': [20, 12, 41.4, 43.6, 50, 60, 30, 70, 55, 65, 40, 45, 80, 85, 35, 25, 90, 75, 52, 62, 
            48, 58, 38, 28, 50, 30, 60, 55, 70, 45, 35, 40, 65, 75, 85, 30, 45, 50, 55, 60,
            70, 80, 90, 20, 30, 40, 50, 60, 45, 55, 65, 75, 85, 95, 50, 55, 60, 65, 70, 75,
            40, 30, 20, 10, 15, 25, 35, 45, 55, 65],

    'Rutting_mm': [4.80, 4.4, 8.7, 8.8, 5.5, 6.0, 7.8, 3.0, 9.2, 4.3, 6.1, 7.4, 2.2, 2.5, 8.0, 9.1, 2.0, 4.8, 
                   6.3, 7.0, 5.2, 6.8, 7.5, 8.1, 4.9, 5.7, 6.2, 7.4, 8.5, 9.0, 5.1, 6.4, 7.8, 8.2, 9.3, 4.0, 
                   5.8, 6.5, 7.7, 8.4, 9.2, 3.5, 4.7, 6.0, 7.5, 8.0, 6.9, 5.6, 7.3, 8.1, 9.0, 5.0, 6.2, 7.5, 
                   8.0, 9.2, 4.7, 5.8, 6.9, 7.5, 8.4, 9.3, 5.5, 4.8, 3.6, 7.9, 8.2, 9.0, 6.7, 5.6], #, 7.1, 8.5

    'Fatigue_Cracking_m2': [2.00, 1.5, 2.1, 2.4, 3.0, 3.5, 4.0, 1.0, 4.5, 1.2, 2.5, 3.8, 0.5, 0.3, 2.8, 3.2, 0.2, 
                            2.9, 3.4, 4.1, 2.3, 3.6, 4.2, 3.9, 2.2, 3.1, 4.0, 1.5, 2.8, 3.9, 1.4, 3.7, 4.1, 2.9, 
                            3.5, 4.0, 2.1, 3.6, 4.4, 1.9, 2.5, 3.8, 4.3, 1.5, 2.0, 3.4, 4.0, 2.5, 3.1, 4.2, 1.9, 
                            2.8, 3.5, 4.1, 1.7, 2.3, 3.6, 4.2, 2.8, 3.7, 4.0, 1.9, 2.8, 3.5, 4.0, 2.1, 3.4, 4.3, 
                            1.8, 2.7],

    'Block_Cracking_m2': [0.0, 0.0, 0.9, 0.6, 1.0, 0.5, 2.0, 0.4, 1.5, 0.8, 0.3, 1.2, 0.0, 0.1, 1.0, 1.3, 0.2, 0.7, 
                          1.4, 2.1, 0.6, 0.9, 1.6, 1.8, 0.5, 0.2, 1.1, 1.5, 2.2, 1.7, 0.3, 0.6, 1.2, 1.8, 2.3, 0.4, 
                          0.9, 1.5, 2.0, 0.7, 1.2, 1.6, 2.3, 0.3, 0.8, 1.4, 2.0, 0.5, 1.1, 1.8, 0.6, 1.0, 1.7, 2.2, 
                          0.9, 1.3, 1.9, 2.4, 0.4, 1.0, 1.6, 2.1, 0.7, 1.2, 1.8, 2.5, 0.6, 1.1, 1.7, 2.3],#, 0.8

    'Longitudinal_Cracking_m2': [26.0, 47.5, 16.9, 18.8, 25.0, 30.0, 20.0, 15.0, 28.0, 22.0, 35.0, 40.0, 10.0, 8.0, 
                                 32.0, 45.0, 12.0, 18.0, 38.0, 42.0, 25.5, 29.0, 31.0, 36.0, 14.0, 20.0, 28.5, 32.5, 
                                 15.5, 22.5, 33.0, 38.5, 20.0, 26.5, 35.0, 15.0, 21.0, 27.5, 34.0, 18.5, 25.0, 30.5, 
                                 37.0, 16.0, 23.5, 28.0, 33.5, 19.0, 26.0, 31.5, 36.5, 17.0, 24.0, 29.5, 34.5, 20.0, 
                                 26.5, 35.5, 16.0, 23.0, 31.0, 36.5, 19.5, 24.5, 29.0, 33.5, 21.5, 28.5, 32.0, 36.0]
}

for i in data:
    print(i, len(data[i]))

PCI 70
Rutting_mm 70
Fatigue_Cracking_m2 70
Block_Cracking_m2 70
Longitudinal_Cracking_m2 70
