In [1]:
import json
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import plotly.graph_objects as go
import kaleido
import plotly.io as pio
import pyvin
from scipy.optimize import curve_fit
import cv2
import sys

In [2]:
# Import results
results_df = pd.read_json("../outputs/Test_Data_Results.json")

In [25]:
# make outputs folder
output_path = r'../tmp/report/' 
if not os.path.exists(output_path):
    os.makedirs(output_path)

In [26]:
# ADD MAKE, MODEL, YEAR Columns

# Create empty lists to store the extracted information
makes = []
models = []
years = []

# Iterate over each VIN number
for vin in results_df['VIN']:
    if vin != None:
    # Decode the VIN number
        vin_info = pyvin.VIN(vin, error_handling=pyvin.PASS)
        
        # Extract the make, model, and year from the decoded VIN
        make = vin_info.Make
        model = vin_info.Model
        year = vin_info.ModelYear
        
        # Append the extracted information to the respective lists
        makes.append(make)
        models.append(model)
        years.append(year)
    else:
        makes.append("NA")
        models.append("NA")
        years.append("NA")

# Add the make, model, and year columns to the DataFrame
results_df['Make'] = makes
results_df['Model'] = models
results_df['Year'] = years


In [4]:
# FIGURE 3

from scipy.optimize import curve_fit

results_df['match'] = (results_df['pred'] == results_df['VERIFIED_ODOMETER_READING']).astype(int)

# Find minimum value in the column of lists
results_df['min_value'] = results_df['digits_conf'].apply(lambda x: min(x) if len(x) > 0 else None)

# Drop rows with NaN values in 'min_value' column
results_df = results_df.dropna(subset=['min_value'])

# Define the binomial function
def binomial_function(x, p):
    return p**x * (1-p)**(1-x)

# Fit the binomial function to the data
popt, pcov = curve_fit(binomial_function, results_df['min_value'], results_df['match'], p0=0.5)

# Generate smooth values for plotting
smooth_values = np.linspace(results_df['min_value'].min(), results_df['min_value'].max(), 100)
smooth_match = binomial_function(smooth_values, *popt)

# Jittering for better visibility
jitter = np.random.uniform(low=-0.09, high=0.09, size=len(results_df['match']))
jittered_match = results_df['match'] + jitter


# Increase the size of the plot
plt.figure(figsize=(9, 4))
plt.tight_layout()

# Scatter plot
plt.scatter(results_df['min_value'], jittered_match, s=2, alpha=1, color='gray', label='Data Points')

# Plot the smoothing line
plt.plot(smooth_values, smooth_match, color='red', label='Smoothing Line', linewidth=3, alpha=0.5)

# Set labels and title
plt.xlabel('Minimum Digit Confidence', fontsize=10)
plt.ylabel('Prediction', fontsize=10)

# Customize y-axis labels
plt.yticks([0, 1], ['Incorrect', 'Correct'])

# Save the figure as a PNG image
plt.savefig('../tmp/report/final_report_Fig3.png', dpi=300)

# Close the figure
plt.close()

In [3]:
# Figure 5 - SANKEY DIAGRAM
sys.path.append("../src")
from plot import *

fig = make_sankey_plot(results_df, 0.75, 0.85)
fig.update_layout(
    width=1200,   # Specify the width of the image in pixels
    height=600,  # Specify the height of the image in pixels
)
fig.update_layout(title_text="", font=dict(size=15, color='black'))
fig.write_image('../tmp/report/final_report_Fig5.png', engine='kaleido')

In [29]:
# Figure 6
results_df = results_df.sort_values('Year')
yearly_avg_odo = results_df.groupby('Year')['odo_conf'].mean().rolling(window=5, center=True).mean()
yearly_avg_digit = results_df.groupby('Year')['value_conf'].mean().rolling(window=5, center=True).mean()

In [30]:
# Figure 6 
fig1, ax1 = plt.subplots(figsize=(6, 4))

ax1.scatter(results_df['Year'], results_df['odo_conf'], s=10, c='gray', alpha=0.5)
ax1.plot(yearly_avg_digit.index, yearly_avg_digit.values, color='red', linestyle='--', label='Yearly Average')
ax1.set_xlabel('Manufacture Year', fontsize=8, fontweight='bold')
ax1.set_ylabel('Odometer Confidence', fontsize=8, fontweight='bold')

ax1.xaxis.set_major_locator(ticker.MultipleLocator(base=5))

plt.tight_layout()

# Save the figure as a PNG image
plt.savefig('../tmp/report/final_report_Fig6a.png', dpi=300)

# Close the figure
plt.close(fig1)


# Figure 6 - Plot 2
fig2, ax2 = plt.subplots(figsize=(6, 4))

ax2.scatter(results_df['Year'], results_df['value_conf'], s=10, c='gray', alpha=0.5)
ax2.plot(yearly_avg_odo.index, yearly_avg_odo.values, color='red', linestyle='--', label='Yearly Average')
ax2.set_xlabel('Manufacture Year', fontsize=8, fontweight='bold')
ax2.set_ylabel('Digit Confidence', fontsize=8, fontweight='bold')

# Adjust x-axis ticks and labels 
ax2.xaxis.set_major_locator(ticker.MultipleLocator(base=5))

plt.tight_layout()

# Save the figure as a PNG image
plt.savefig('../tmp/report/final_report_Fig6b.png', dpi=300)

# Close the figure
plt.close(fig2)