In [1]:
# Import dependencies
import pandas as pd
# Read in file
df = pd.read_csv('./resources/recalls.csv')

In [2]:
# Information of dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26590 entries, 0 to 26589
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Report Received Date  26590 non-null  object 
 1   NHTSA ID              26590 non-null  object 
 2   Recall Link           26590 non-null  object 
 3   Manufacturer          26590 non-null  object 
 4   Subject               26590 non-null  object 
 5   Component             26590 non-null  object 
 6   Mfr Campaign Number   26561 non-null  object 
 7   Recall Type           26590 non-null  object 
 8   Potentially Affected  26548 non-null  float64
 9   Recall Description    24189 non-null  object 
 10  Consequence Summary   21702 non-null  object 
 11  Corrective Action     24202 non-null  object 
dtypes: float64(1), object(11)
memory usage: 2.4+ MB


In [3]:
# Find max length of all columns in df with 'object' dtype (string)
# Used to provide VARCHAR column limits when creating table schema
columns = list(df.select_dtypes(['object']).columns)
for column in columns:
    print(f'{column}: {df[column].str.len().max()}')

Report Received Date: 10
NHTSA ID: 9
Recall Link: 62
Manufacturer: 40
Subject: 50
Component: 28
Mfr Campaign Number: 20.0
Recall Type: 10
Recall Description: 1982.0
Consequence Summary: 758.0
Corrective Action: 1672.0


In [4]:
# Temporary dataframe to keep any rows with a null value (data exploration)
df1 = df[df.isna().any(axis=1)]

In [5]:
df1.head()

Unnamed: 0,Report Received Date,NHTSA ID,Recall Link,Manufacturer,Subject,Component,Mfr Campaign Number,Recall Type,Potentially Affected,Recall Description,Consequence Summary,Corrective Action
200,10/14/2022,22E088000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,SAF-Holland,Loose Brake Pad Retaining Clip May Damage Tire,"SERVICE BRAKES, AIR",,Equipment,10591.0,SAF-Holland is recalling certain ULX40 Suspens...,A retaining clip or bracket that detaches can ...,Dealers will inspect and repair the spring bra...
341,08/26/2022,22V647000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"Hino Motors Sales U.S.A., Inc.",Electrical Short Circuit May Cause Engine Stall,ENGINE AND ENGINE COOLING,,Vehicle,7.0,"Hino Motors Sales U.S.A., Inc. (Hino) is recal...",An unexpected engine shut down can increase th...,"Dealers will replace the ECM, free of charge. ..."
369,08/13/2022,22T013000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"Zhaoqing Junhong Co., Ltd.",Tire Belt May Separate/ FMVSS 139,TIRES,NR (Not Reported),Tire,,"Zhaoqing Junhong Co., Ltd. (Zhaoquing) is reca...",Belt separation may lead to loss of vehicle co...,Dealers will inspect times to confirm the prod...
606,06/07/2022,22C002000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"Clek, Inc.",Installation Diagram Missing/FMVSS 213,CHILD SEAT,,Child Seat,11123.0,"Clek, Inc. (Clek) is recalling certain Olli bo...",A missing installation label may result in an ...,"Clek will mail owners a new label, free of cha..."
1056,12/23/2021,21E104000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,Crysteel Manufacturing Inc,Hydraulic Cylinder Failure,EQUIPMENT,,Equipment,261.0,Crysteel Manufacturing Inc. (Crysteel) is reca...,Cylinder detachment can contact the cab and ca...,"Crysteel will replace the cylinder, free of ch..."


In [6]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4996 entries, 200 to 26589
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Report Received Date  4996 non-null   object 
 1   NHTSA ID              4996 non-null   object 
 2   Recall Link           4996 non-null   object 
 3   Manufacturer          4996 non-null   object 
 4   Subject               4996 non-null   object 
 5   Component             4996 non-null   object 
 6   Mfr Campaign Number   4967 non-null   object 
 7   Recall Type           4996 non-null   object 
 8   Potentially Affected  4954 non-null   float64
 9   Recall Description    2595 non-null   object 
 10  Consequence Summary   108 non-null    object 
 11  Corrective Action     2608 non-null   object 
dtypes: float64(1), object(11)
memory usage: 507.4+ KB


In [7]:
# Drop rows with null values in specific column
df.dropna(subset=['Mfr Campaign Number'], inplace=True)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 26561 entries, 0 to 26589
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Report Received Date  26561 non-null  object 
 1   NHTSA ID              26561 non-null  object 
 2   Recall Link           26561 non-null  object 
 3   Manufacturer          26561 non-null  object 
 4   Subject               26561 non-null  object 
 5   Component             26561 non-null  object 
 6   Mfr Campaign Number   26561 non-null  object 
 7   Recall Type           26561 non-null  object 
 8   Potentially Affected  26519 non-null  float64
 9   Recall Description    24160 non-null  object 
 10  Consequence Summary   21673 non-null  object 
 11  Corrective Action     24173 non-null  object 
dtypes: float64(1), object(11)
memory usage: 2.6+ MB


In [9]:
# Continuation of dropping rows with null values
df.dropna(subset=['Potentially Affected'], inplace=True)
df.dropna(subset=['Recall Description'], inplace=True)
df.dropna(subset=['Consequence Summary'], inplace=True)
df.dropna(subset=['Corrective Action'], inplace=True)

In [10]:
# Confirming all df values are not null
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21594 entries, 0 to 26586
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Report Received Date  21594 non-null  object 
 1   NHTSA ID              21594 non-null  object 
 2   Recall Link           21594 non-null  object 
 3   Manufacturer          21594 non-null  object 
 4   Subject               21594 non-null  object 
 5   Component             21594 non-null  object 
 6   Mfr Campaign Number   21594 non-null  object 
 7   Recall Type           21594 non-null  object 
 8   Potentially Affected  21594 non-null  float64
 9   Recall Description    21594 non-null  object 
 10  Consequence Summary   21594 non-null  object 
 11  Corrective Action     21594 non-null  object 
dtypes: float64(1), object(11)
memory usage: 2.1+ MB


In [11]:
list(df.columns)

['Report Received Date',
 'NHTSA ID',
 'Recall Link',
 'Manufacturer',
 'Subject',
 'Component',
 'Mfr Campaign Number',
 'Recall Type',
 'Potentially Affected',
 'Recall Description',
 'Consequence Summary',
 'Corrective Action']

In [12]:
# String replacement of single quotes to blank (data has both single and double quotes)
# to resolve postgres import error
for column in columns:
    df[column] = df[column].str.replace('\'','', regex=True)

In [13]:
# First iteration of cleaned data (no null values, string replacement)
df.to_csv('./resources/recalls_dropna.csv', index=False)

### Additional Data Exploration

In [14]:
import pandas as pd
df = pd.read_csv('./resources/recalls_dropna.csv')
df.head()

Unnamed: 0,Report Received Date,NHTSA ID,Recall Link,Manufacturer,Subject,Component,Mfr Campaign Number,Recall Type,Potentially Affected,Recall Description,Consequence Summary,Corrective Action
0,01/06/2023,23V002000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,Triple E Recreational Vehicles,Battery Disconnect Switch May Short,ELECTRICAL SYSTEM,CA#10253-1,Vehicle,341.0,Triple E Recreational Vehicles (Triple E) is r...,An electrical short-circuit can increase the r...,Dealers will install two rubber terminal post ...
1,01/05/2023,23V001000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"Volvo Car USA, LLC",Steering Wheel May Lock Up,STEERING,R10213,Vehicle,74.0,"Volvo Car USA, LLC (Volvo Car) is recalling ce...",A screw inside the gearbox housing can cause t...,"Dealers will replace the steering gear, free o..."
2,12/29/2022,22V956000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"Volkswagen Group of America, Inc.",12-Volt Battery Cable May Short Circuit,ELECTRICAL SYSTEM,97HA,Vehicle,1042.0,"Volkswagen Group of America, Inc. (Volkswagen)...",A short circuit may cause a loss of drive powe...,Owners are advised to park outside and away fr...
3,12/29/2022,22V957000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,Indian Motorcycle Company,Kickstand May Not Retract Properly/FMVSS 123,STRUCTURE,I-23-01,Vehicle,4653.0,Indian Motorcycle Company (Indian) is recallin...,A kickstand that does not properly retract can...,The remedy is currently under development. Ow...
4,12/29/2022,22E102000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,Hendrickson USA. L.L.C.,Loose Brake Pad Retaining Clip May Damage Tire,"SERVICE BRAKES, AIR",NR (Not Reported),Equipment,12.0,"Hendrickson USA, L.L.C. (Hendrickson) is recal...",A retaining clip or bracket that detaches can ...,All affected VanTraxx assemblies have been ins...


In [15]:
# Confirming 'NHSTA ID' column values are all unique
df['NHTSA ID'].nunique()

21594

In [16]:
# Convert 'Report Received Date' to date format
df['Report Received Date'] = pd.to_datetime(df['Report Received Date'], format='%m/%d/%Y')

df.head()

Unnamed: 0,Report Received Date,NHTSA ID,Recall Link,Manufacturer,Subject,Component,Mfr Campaign Number,Recall Type,Potentially Affected,Recall Description,Consequence Summary,Corrective Action
0,2023-01-06,23V002000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,Triple E Recreational Vehicles,Battery Disconnect Switch May Short,ELECTRICAL SYSTEM,CA#10253-1,Vehicle,341.0,Triple E Recreational Vehicles (Triple E) is r...,An electrical short-circuit can increase the r...,Dealers will install two rubber terminal post ...
1,2023-01-05,23V001000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"Volvo Car USA, LLC",Steering Wheel May Lock Up,STEERING,R10213,Vehicle,74.0,"Volvo Car USA, LLC (Volvo Car) is recalling ce...",A screw inside the gearbox housing can cause t...,"Dealers will replace the steering gear, free o..."
2,2022-12-29,22V956000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"Volkswagen Group of America, Inc.",12-Volt Battery Cable May Short Circuit,ELECTRICAL SYSTEM,97HA,Vehicle,1042.0,"Volkswagen Group of America, Inc. (Volkswagen)...",A short circuit may cause a loss of drive powe...,Owners are advised to park outside and away fr...
3,2022-12-29,22V957000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,Indian Motorcycle Company,Kickstand May Not Retract Properly/FMVSS 123,STRUCTURE,I-23-01,Vehicle,4653.0,Indian Motorcycle Company (Indian) is recallin...,A kickstand that does not properly retract can...,The remedy is currently under development. Ow...
4,2022-12-29,22E102000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,Hendrickson USA. L.L.C.,Loose Brake Pad Retaining Clip May Damage Tire,"SERVICE BRAKES, AIR",NR (Not Reported),Equipment,12.0,"Hendrickson USA, L.L.C. (Hendrickson) is recal...",A retaining clip or bracket that detaches can ...,All affected VanTraxx assemblies have been ins...


In [17]:
import datetime as dt

# DF for last ten years (>=2012)
last10yr_df = df[df['Report Received Date'].dt.year >= 2012]
last10yr_df.info()
print(last10yr_df.tail())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10128 entries, 0 to 10127
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Report Received Date  10128 non-null  datetime64[ns]
 1   NHTSA ID              10128 non-null  object        
 2   Recall Link           10128 non-null  object        
 3   Manufacturer          10128 non-null  object        
 4   Subject               10128 non-null  object        
 5   Component             10128 non-null  object        
 6   Mfr Campaign Number   10128 non-null  object        
 7   Recall Type           10128 non-null  object        
 8   Potentially Affected  10128 non-null  float64       
 9   Recall Description    10128 non-null  object        
 10  Consequence Summary   10128 non-null  object        
 11  Corrective Action     10128 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(10)
memory usage: 1.0+ MB
      R

In [18]:
# Count of all values in 'Component' column
last10yr_df['Component'].value_counts().sort_index(ascending=True)

AIR BAGS                         720
BACK OVER PREVENTION              97
CHILD SEAT                        61
COMMUNICATION                      9
ELECTRICAL SYSTEM               1204
ELECTRONIC STABILITY CONTROL      53
ENGINE                            94
ENGINE AND ENGINE COOLING        363
EQUIPMENT                       1871
EQUIPMENT ADAPTIVE/MOBILITY      176
EXTERIOR LIGHTING                425
FORWARD COLLISION AVOIDANCE       20
FUEL SYSTEM, DIESEL              148
FUEL SYSTEM, GASOLINE            318
FUEL SYSTEM, OTHER                93
HYBRID PROPULSION SYSTEM          39
INTERIOR LIGHTING                  6
LANE DEPARTURE                     8
LATCHES/LOCKS/LINKAGES           177
OTHER                              8
PARKING BRAKE                    126
POWER TRAIN                      523
SEAT BELTS                       326
SEATS                            270
SERVICE BRAKES                    79
SERVICE BRAKES, AIR              237
SERVICE BRAKES, ELECTRIC          16
S

In [19]:
components_list = list(last10yr_df['Component'].unique())
components_list.sort()
components_list

['AIR BAGS',
 'BACK OVER PREVENTION',
 'CHILD SEAT',
 'COMMUNICATION',
 'ELECTRICAL SYSTEM',
 'ELECTRONIC STABILITY CONTROL',
 'ENGINE',
 'ENGINE AND ENGINE COOLING',
 'EQUIPMENT',
 'EQUIPMENT ADAPTIVE/MOBILITY',
 'EXTERIOR LIGHTING',
 'FORWARD COLLISION AVOIDANCE',
 'FUEL SYSTEM, DIESEL',
 'FUEL SYSTEM, GASOLINE',
 'FUEL SYSTEM, OTHER',
 'HYBRID PROPULSION SYSTEM',
 'INTERIOR LIGHTING',
 'LANE DEPARTURE',
 'LATCHES/LOCKS/LINKAGES',
 'OTHER',
 'PARKING BRAKE',
 'POWER TRAIN',
 'SEAT BELTS',
 'SEATS',
 'SERVICE BRAKES',
 'SERVICE BRAKES, AIR',
 'SERVICE BRAKES, ELECTRIC',
 'SERVICE BRAKES, HYDRAULIC',
 'STEERING',
 'STRUCTURE',
 'SUSPENSION',
 'TBD',
 'TIRES',
 'TRACTION CONTROL SYSTEM',
 'TRAILER HITCHES',
 'UNKNOWN OR OTHER',
 'VEHICLE SPEED CONTROL',
 'VISIBILITY',
 'VISIBILITY/WIPER',
 'WHEELS']

#### To group similar values (rename):
- 'ENGINE' and 'ENGINE AND ENGINE COOLING'?; done
- 'EQUIPMENT' and 'EQUIPMENT ADAPTIVE/MOBILITY'?; done
- 'FUEL SYSTEM, DIESEL', 'FUEL SYSTEM, GASOLINE', 'FUEL SYSTEM, OTHER'?; done
- 'SERVICE BRAKES*' columns (4); done
- 'OTHER' and 'UNKNOWN OR OTHER'?; done
- 'VISIBILITY' and 'VISIBILITY/WIPER'?; done
- 'TIRES' and 'WHEELS'? keep separate?; tires (278) are component of the wheels (113)

#### To review info and rename to other similar value:
- 'TBD'; done


In [20]:
last10yr_df[last10yr_df['Component']=='TBD']

Unnamed: 0,Report Received Date,NHTSA ID,Recall Link,Manufacturer,Subject,Component,Mfr Campaign Number,Recall Type,Potentially Affected,Recall Description,Consequence Summary,Corrective Action
6486,2016-06-02,16V397000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"Tiffin Motorhomes, Inc.",Possible Steering Loss Due to Wrong Spacer,TBD,NR (Not Reported),Vehicle,539.0,Tiffin Motorhomes is recalling certain model y...,The steering wheels connection to the front ax...,"Tiffin will notify owners, and dealers will in..."


In [21]:
# Change 'TBD' value to 'STEERING'
last10yr_df.loc[last10yr_df['Component']=='TBD','Component'] = 'STEERING'
last10yr_df.iloc[6486,:]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Report Received Date                                  2016-06-02 00:00:00
NHTSA ID                                                        16V397000
Recall Link             Go to Recall (https://www.nhtsa.gov/recalls?nh...
Manufacturer                                      Tiffin Motorhomes, Inc.
Subject                        Possible Steering Loss Due to Wrong Spacer
Component                                                        STEERING
Mfr Campaign Number                                     NR (Not Reported)
Recall Type                                                       Vehicle
Potentially Affected                                                539.0
Recall Description      Tiffin Motorhomes is recalling certain model y...
Consequence Summary     The steering wheels connection to the front ax...
Corrective Action       Tiffin will notify owners, and dealers will in...
Name: 6486, dtype: object

In [22]:
# Change 'VISIBILITY/WIPER' to 'VISIBILITY'
last10yr_df.loc[last10yr_df['Component']=='VISIBILITY/WIPER', 'Component'] = 'VISIBILITY'

# Change 'OTHER' to 'UNKNOWN OR OTHER'
last10yr_df.loc[last10yr_df['Component']=='OTHER', 'Component'] = 'UNKNOWN OR OTHER'

# Change 'SERVICE BRAKES*' values to 'SERVICE BRAKES'
last10yr_df.loc[last10yr_df['Component']=='SERVICE BRAKES, AIR', 'Component'] = 'SERVICE BRAKES'
last10yr_df.loc[last10yr_df['Component']=='SERVICE BRAKES, ELECTRIC', 'Component'] = 'SERVICE BRAKES'
last10yr_df.loc[last10yr_df['Component']=='SERVICE BRAKES, HYDRAULIC', 'Component'] = 'SERVICE BRAKES'

# Change 'FUEL SYSTEM*' values to 'FUEL SYSTEM'
last10yr_df.loc[last10yr_df['Component']=='FUEL SYSTEM, DIESEL', 'Component'] = 'FUEL SYSTEM'
last10yr_df.loc[last10yr_df['Component']=='FUEL SYSTEM, GASOLINE', 'Component'] = 'FUEL SYSTEM'
last10yr_df.loc[last10yr_df['Component']=='FUEL SYSTEM, OTHER', 'Component'] = 'FUEL SYSTEM'

# Change 'ENGINE AND ENGINE COOLING' to 'ENGINE'
last10yr_df.loc[last10yr_df['Component']=='ENGINE AND ENGINE COOLING', 'Component'] = 'ENGINE'

# Change 'EQUIPMENT ADAPTIVE/MOBILITY' to 'EQUIPMENT'
last10yr_df.loc[last10yr_df['Component']=='EQUIPMENT ADAPTIVE/MOBILITY', 'Component'] = 'EQUIPMENT'

In [23]:
components_list = list(last10yr_df['Component'].unique())
components_list.sort()
components_list

['AIR BAGS',
 'BACK OVER PREVENTION',
 'CHILD SEAT',
 'COMMUNICATION',
 'ELECTRICAL SYSTEM',
 'ELECTRONIC STABILITY CONTROL',
 'ENGINE',
 'EQUIPMENT',
 'EXTERIOR LIGHTING',
 'FORWARD COLLISION AVOIDANCE',
 'FUEL SYSTEM',
 'HYBRID PROPULSION SYSTEM',
 'INTERIOR LIGHTING',
 'LANE DEPARTURE',
 'LATCHES/LOCKS/LINKAGES',
 'PARKING BRAKE',
 'POWER TRAIN',
 'SEAT BELTS',
 'SEATS',
 'SERVICE BRAKES',
 'STEERING',
 'STRUCTURE',
 'SUSPENSION',
 'TIRES',
 'TRACTION CONTROL SYSTEM',
 'TRAILER HITCHES',
 'UNKNOWN OR OTHER',
 'VEHICLE SPEED CONTROL',
 'VISIBILITY',
 'WHEELS']

In [24]:
# Count of all values in 'Component' column (cleaned)
last10yr_df['Component'].value_counts().sort_index(ascending=True)

AIR BAGS                         720
BACK OVER PREVENTION              97
CHILD SEAT                        61
COMMUNICATION                      9
ELECTRICAL SYSTEM               1204
ELECTRONIC STABILITY CONTROL      53
ENGINE                           457
EQUIPMENT                       2047
EXTERIOR LIGHTING                425
FORWARD COLLISION AVOIDANCE       20
FUEL SYSTEM                      559
HYBRID PROPULSION SYSTEM          39
INTERIOR LIGHTING                  6
LANE DEPARTURE                     8
LATCHES/LOCKS/LINKAGES           177
PARKING BRAKE                    126
POWER TRAIN                      523
SEAT BELTS                       326
SEATS                            270
SERVICE BRAKES                   702
STEERING                         499
STRUCTURE                        505
SUSPENSION                       515
TIRES                            278
TRACTION CONTROL SYSTEM            2
TRAILER HITCHES                   77
UNKNOWN OR OTHER                  21
V

In [31]:
# data frame after dropping Recall link column.
last10yr_df_cleaned = last10yr_df.drop(['Recall Link'], axis = 1)
last10yr_df_cleaned

Unnamed: 0,Report Received Date,NHTSA ID,Manufacturer,Subject,Component,Mfr Campaign Number,Recall Type,Potentially Affected,Recall Description,Consequence Summary,Corrective Action
0,2023-01-06,23V002000,Triple E Recreational Vehicles,Battery Disconnect Switch May Short,ELECTRICAL SYSTEM,CA#10253-1,Vehicle,341.0,Triple E Recreational Vehicles (Triple E) is r...,An electrical short-circuit can increase the r...,Dealers will install two rubber terminal post ...
1,2023-01-05,23V001000,"Volvo Car USA, LLC",Steering Wheel May Lock Up,STEERING,R10213,Vehicle,74.0,"Volvo Car USA, LLC (Volvo Car) is recalling ce...",A screw inside the gearbox housing can cause t...,"Dealers will replace the steering gear, free o..."
2,2022-12-29,22V956000,"Volkswagen Group of America, Inc.",12-Volt Battery Cable May Short Circuit,ELECTRICAL SYSTEM,97HA,Vehicle,1042.0,"Volkswagen Group of America, Inc. (Volkswagen)...",A short circuit may cause a loss of drive powe...,Owners are advised to park outside and away fr...
3,2022-12-29,22V957000,Indian Motorcycle Company,Kickstand May Not Retract Properly/FMVSS 123,STRUCTURE,I-23-01,Vehicle,4653.0,Indian Motorcycle Company (Indian) is recallin...,A kickstand that does not properly retract can...,The remedy is currently under development. Ow...
4,2022-12-29,22E102000,Hendrickson USA. L.L.C.,Loose Brake Pad Retaining Clip May Damage Tire,SERVICE BRAKES,NR (Not Reported),Equipment,12.0,"Hendrickson USA, L.L.C. (Hendrickson) is recal...",A retaining clip or bracket that detaches can ...,All affected VanTraxx assemblies have been ins...
...,...,...,...,...,...,...,...,...,...,...,...
10123,2012-01-06,12V004000,"Chrysler (FCA US, LLC)",TIRE PRESSURE MONITOR SYSTEM/FMVSS 138,TIRES,L43,Vehicle,303.0,CHRYSLER IS RECALLING CERTAIN MODEL YEAR 2012 ...,UNDERINFLATED TIRES CAN RESULT IN TIRE OVERLOA...,CHRYSLER WILL NOTIFY OWNERS AND DEALERS WILL R...
10124,2012-01-04,11E057000,DEXTER AXLE,E/H BRAKE ACTUATOR,EQUIPMENT,NR (Not Reported),Equipment,1713.0,DEXTER AXLE IS RECALLING CERTAIN ELECTRIC/HYDR...,THE E/H BRAKE ACTUATOR PROVIDES BRAKING POWER ...,DEXTER AXLE IS WORKING WITH THE INSTALLERS AND...
10125,2012-01-04,12V002000,"New Flyer of America, Inc.",INSTRUMENT CLUSTER SOFTWARE/PARKER,ELECTRICAL SYSTEM,R11-026,Vehicle,490.0,NEW FLYER IS RECALLING CERTAIN MODEL YEARS 200...,ENGINE SHUTDOWN WOULD RESULT IN LOSS OF VEHICL...,NEW FLYER WILL REPROGRAM THE INSTRUMENT PANEL ...
10126,2012-01-03,11V606000,Utilimaster Corporation,FMVSS 206/DOOR CLOSURE WARNING SYSTEM,LATCHES/LOCKS/LINKAGES,NR (Not Reported),Vehicle,9861.0,Certain Utilimaster model year 2009 through 20...,Certain work trucks are not equipped with eith...,"Utilimaster will notify owners, and dealers wi..."


In [33]:
last10yr_df_cleaned['Subject'].nunique()

7144

In [34]:
# shrunken data to last 10 year and cleaned 
last10yr_df_cleaned.to_csv('./resources/last10yr_df_cleaned.csv', index=False)