# Medical Supplier Data Preprocessing Notebook : Main Dataset

A regular preprocessing workflow on a real-world dataset of medical equipment suppliers. It is designed to showcase structured, careful data handling process aligned with office data maintenance responsibilities.

## Dataset Loading

In [2]:
import pandas as pd

# Load the full dataset
df = pd.read_csv("../data/raw/Medical-Equipment-Suppliers.csv", low_memory=False)
df.shape

(63112, 17)

## Initial Inspection

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63112 entries, 0 to 63111
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   provider_id             63112 non-null  int64  
 1   acceptsassignement      63112 non-null  bool   
 2   participationbegindate  63112 non-null  object 
 3   businessname            63112 non-null  object 
 4   practicename            63112 non-null  object 
 5   practiceaddress1        63112 non-null  object 
 6   practiceaddress2        13087 non-null  object 
 7   practicecity            63112 non-null  object 
 8   practicestate           63112 non-null  object 
 9   practicezip9code        63112 non-null  int64  
 10  telephonenumber         63112 non-null  int64  
 11  specialitieslist        62423 non-null  object 
 12  providertypelist        7185 non-null   object 
 13  supplieslist            63064 non-null  object 
 14  latitude                63111 non-null

In [4]:
df.head()

Unnamed: 0,provider_id,acceptsassignement,participationbegindate,businessname,practicename,practiceaddress1,practiceaddress2,practicecity,practicestate,practicezip9code,telephonenumber,specialitieslist,providertypelist,supplieslist,latitude,longitude,is_contracted_for_cba
0,20561045,True,2007-11-01,HARTIG DRUG CO CORP,HARTIG DRUG STORE 5,11002 BARTELL BLVD,,GALENA,IL,610368215,8157772700,Pharmacy,,Enteral Nutrients|Walkers|Tracheotomy Supplies...,42.44484,-90.45593,False
1,20489003,False,2014-07-01,WAL-MART STORES EAST LP,WALMART PHARMACY 10-0674,1112 NASHVILLE PIKE,,GALLATIN,TN,370667116,6154521110,Optician|Pharmacy,,Enteral Nutrients|Oral Antiemetic Drugs|Blood ...,36.37498,-86.47526,False
2,20447711,False,2019-01-01,OHIO CVS STORES LLC,CVS PHARMACY #03306,1949 WEST MARKET STREET,,AKRON,OH,443136910,3308675410,Pharmacy,,Blood Glucose Monitors/Supplies (Mail Order)|P...,41.11353,-81.5782,False
3,20541431,False,2010-03-31,LUXOTTICA OF AMERICA INC,LENSCRAFERS #00855,2526 S THIRD ST,,JACKSONVILLE,FL,322506024,9042472374,Optician,,Prosthetic Lenses: Conventional Eyeglasses,30.26696,-81.38857,False
4,20469089,True,2016-12-15,SPECIALTY INFUSIONS INC,PRIME INFUSIONS,1624 BROADWAY,,BROOKLYN,NY,112071026,7184434000,Pharmacy,,External Infusion Pumps and/or Supplies|Osteog...,40.68483,-73.91404,False


## Null Value Summary

In [5]:
df.isnull().sum().sort_values(ascending=False)

providertypelist          55927
practiceaddress2          50025
specialitieslist            689
supplieslist                 48
latitude                      1
longitude                     1
provider_id                   0
businessname                  0
acceptsassignement            0
practicestate                 0
practicecity                  0
practiceaddress1              0
practicename                  0
participationbegindate        0
telephonenumber               0
practicezip9code              0
is_contracted_for_cba         0
dtype: int64

## Unique Value Counts per Column

In [6]:
df.nunique().sort_values()

acceptsassignement            2
is_contracted_for_cba         2
practicestate                55
providertypelist            308
specialitieslist            707
practiceaddress2           3312
participationbegindate     6001
practicecity               7130
supplieslist              15324
businessname              19466
practicename              54388
latitude                  58712
longitude                 59361
practicezip9code          60834
practiceaddress1          61566
telephonenumber           62258
provider_id               63112
dtype: int64

## Standardize `practicestate` column

In [7]:
df['practicestate'] = df['practicestate'].str.upper().str.strip()

## filling null values and cleaning `businessname` column

In [8]:
df['businessname'] = df['businessname'].fillna("Missing")
df['businessname_clean'] = df['businessname'].str.title().str.strip()

## Format `telephonenumber`

In [9]:
df['telephonenumber'] = df['telephonenumber'].astype(str).str.zfill(10)

## Parse Dates to a datetime format

In [10]:
df['participationbegindate'] = pd.to_datetime(df['participationbegindate'], errors='coerce')

## clean `specialitieslist` and `supplieslist`

In [11]:
df['specialitieslist'] = df['specialitieslist'].str.title().str.strip()
df['supplieslist'] = df['supplieslist'].str.title().str.strip()

## Format Zip Codes (9 digits)

In [12]:
df['practicezip9code'] = df['practicezip9code'].astype(str).str.zfill(9)

## preview Cleaned Data

In [13]:
df.head()

Unnamed: 0,provider_id,acceptsassignement,participationbegindate,businessname,practicename,practiceaddress1,practiceaddress2,practicecity,practicestate,practicezip9code,telephonenumber,specialitieslist,providertypelist,supplieslist,latitude,longitude,is_contracted_for_cba,businessname_clean
0,20561045,True,2007-11-01,HARTIG DRUG CO CORP,HARTIG DRUG STORE 5,11002 BARTELL BLVD,,GALENA,IL,610368215,8157772700,Pharmacy,,Enteral Nutrients|Walkers|Tracheotomy Supplies...,42.44484,-90.45593,False,Hartig Drug Co Corp
1,20489003,False,2014-07-01,WAL-MART STORES EAST LP,WALMART PHARMACY 10-0674,1112 NASHVILLE PIKE,,GALLATIN,TN,370667116,6154521110,Optician|Pharmacy,,Enteral Nutrients|Oral Antiemetic Drugs|Blood ...,36.37498,-86.47526,False,Wal-Mart Stores East Lp
2,20447711,False,2019-01-01,OHIO CVS STORES LLC,CVS PHARMACY #03306,1949 WEST MARKET STREET,,AKRON,OH,443136910,3308675410,Pharmacy,,Blood Glucose Monitors/Supplies (Mail Order)|P...,41.11353,-81.5782,False,Ohio Cvs Stores Llc
3,20541431,False,2010-03-31,LUXOTTICA OF AMERICA INC,LENSCRAFERS #00855,2526 S THIRD ST,,JACKSONVILLE,FL,322506024,9042472374,Optician,,Prosthetic Lenses: Conventional Eyeglasses,30.26696,-81.38857,False,Luxottica Of America Inc
4,20469089,True,2016-12-15,SPECIALTY INFUSIONS INC,PRIME INFUSIONS,1624 BROADWAY,,BROOKLYN,NY,112071026,7184434000,Pharmacy,,External Infusion Pumps And/Or Supplies|Osteog...,40.68483,-73.91404,False,Specialty Infusions Inc
