In [1]:
#importing Dependancies
import pandas as pd;

## Measurment type: 
- ***Accelerometer:*** An accelerometer is a sensor that measures acceleration or the rate of change of velocity. In a smartwatch, the accelerometer is used to track movement and activity, such as steps taken, distance travelled, and calories burned. It also enables features such as wrist gestures and shake-to-wake.<br><br/>
- ***Gyroscope:*** A gyroscope is a sensor used to measure the orientation and angular rates of the body. When you move your wrist to check your smartwatch, a gyroscope on your wristwatch will notify you in a split second.<br><br/>
- ***Magnetometer:*** A magnetometer is a sensor used to measure magnetic field strength. In smartwatches, it also refers to a compass sensor. This sensor is used to keep track of the motion and direction in which the user is pointing.<br><br/>
- ***Barometer:*** Barometer on your watch automatically measures the altitude and atmospheric pressure at the current location, and shows all data from the current day in a waveform graph form<br><br/>
- ***Heart Rate:*** Photoplethysmography (PPG) is the most commonly used measurement of heart rate for heart rate trackers. As your heart beats, the volume of blood changes within your cardiovascular systems, including the small skin vessels such as your veins and arteries<br><br/>




In [2]:
#Pulling in the CSV file and changing to DataFrame
csv_path = "Project_3_Room_5/proj3data.csv"
tracker_data = pd.read_csv(csv_path)

tracker_data.head()

Unnamed: 0,Company name,Device name,Crowd funded,County of origin,Region,Release year,Form factor,Accelerometer,Gyroscope,Magnetometer,Barometer,GPS,PPG
0,Actofit,Fitness Tracker,True,India,Asia-Pacific,2017.0,tracker,True,True,True,False,False,True
1,Adidas,MiCoach Fit Smart,False,Germany,Europe,2014.0,tracker,True,False,False,False,False,True
2,Adidas,MiCoach Smart Run,False,Germany,Europe,2013.0,watch,True,False,False,False,True,True
3,Airon,105,False,Norway,Europe,2014.0,tracker,True,False,False,False,False,False
4,Alcatel,Onetouch GO Watch,False,China,Asia-Pacific,2015.0,watch,True,True,True,False,False,True


In [3]:
#checking initial info 
tracker_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 423 entries, 0 to 422
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Company name      423 non-null    object 
 1   Device name       423 non-null    object 
 2   Crowd funded      423 non-null    bool   
 3   County of origin  422 non-null    object 
 4   Region            422 non-null    object 
 5   Release year      406 non-null    float64
 6   Form factor       423 non-null    object 
 7   Accelerometer     423 non-null    bool   
 8   Gyroscope         423 non-null    bool   
 9   Magnetometer      423 non-null    bool   
 10  Barometer         423 non-null    bool   
 11  GPS               423 non-null    bool   
 12  PPG               423 non-null    bool   
dtypes: bool(7), float64(1), object(5)
memory usage: 22.8+ KB


In [4]:
#checking to see if there are any NA values

tracker_data.count()

Company name        423
Device name         423
Crowd funded        423
County of origin    422
Region              422
Release year        406
Form factor         423
Accelerometer       423
Gyroscope           423
Magnetometer        423
Barometer           423
GPS                 423
PPG                 423
dtype: int64

In [5]:
# Dropping all the rows with No Values
tracker_data = tracker_data.dropna(how="any")

In [6]:
#Verifying Dropna worked and now totals are all matching up correctly
tracker_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 406 entries, 0 to 422
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Company name      406 non-null    object 
 1   Device name       406 non-null    object 
 2   Crowd funded      406 non-null    bool   
 3   County of origin  406 non-null    object 
 4   Region            406 non-null    object 
 5   Release year      406 non-null    float64
 6   Form factor       406 non-null    object 
 7   Accelerometer     406 non-null    bool   
 8   Gyroscope         406 non-null    bool   
 9   Magnetometer      406 non-null    bool   
 10  Barometer         406 non-null    bool   
 11  GPS               406 non-null    bool   
 12  PPG               406 non-null    bool   
dtypes: bool(7), float64(1), object(5)
memory usage: 25.0+ KB


In [7]:
#checking column headers
tracker_data.head()

Unnamed: 0,Company name,Device name,Crowd funded,County of origin,Region,Release year,Form factor,Accelerometer,Gyroscope,Magnetometer,Barometer,GPS,PPG
0,Actofit,Fitness Tracker,True,India,Asia-Pacific,2017.0,tracker,True,True,True,False,False,True
1,Adidas,MiCoach Fit Smart,False,Germany,Europe,2014.0,tracker,True,False,False,False,False,True
2,Adidas,MiCoach Smart Run,False,Germany,Europe,2013.0,watch,True,False,False,False,True,True
3,Airon,105,False,Norway,Europe,2014.0,tracker,True,False,False,False,False,False
4,Alcatel,Onetouch GO Watch,False,China,Asia-Pacific,2015.0,watch,True,True,True,False,False,True


In [10]:
#Renamed Company, Device, Country, Type and Heart Rate to a cleaner name. 
renamed_tracker_data = tracker_data.rename(columns={"Company name" : "Company" , "Device name" : "Device" , "County of origin": "Country of origin", "PPG": "Heart Rate", "Form factor": "Type"})
renamed_tracker_data.head()


Unnamed: 0,Company,Device,Crowd funded,Country of origin,Region,Release year,Type,Accelerometer,Gyroscope,Magnetometer,Barometer,GPS,Heart Rate
0,Actofit,Fitness Tracker,True,India,Asia-Pacific,2017.0,tracker,True,True,True,False,False,True
1,Adidas,MiCoach Fit Smart,False,Germany,Europe,2014.0,tracker,True,False,False,False,False,True
2,Adidas,MiCoach Smart Run,False,Germany,Europe,2013.0,watch,True,False,False,False,True,True
3,Airon,105,False,Norway,Europe,2014.0,tracker,True,False,False,False,False,False
4,Alcatel,Onetouch GO Watch,False,China,Asia-Pacific,2015.0,watch,True,True,True,False,False,True


In [12]:
# Validating Unique names of companies
renamed_tracker_data["Company"].unique()


array(['Actofit', 'Adidas', 'Airon', 'Alcatel', 'Allview',
       'Ambit Networks', 'Amiigo', 'Apple', 'ASUS', 'Atlas', 'Awatch',
       'Basis', 'Beurer', 'BIA', 'Bluboo', 'Burg', 'CarePredict', 'Casio',
       'Cellular ', 'Cogito', 'Cookoo', 'Denver', 'Elephone', 'Empatica',
       'Emporio Armani', 'Epson', 'Fitbit', 'Fitbug', 'Forever', 'Fossil',
       'Garmin', 'Garmin ', 'Goji', 'GOQii', 'Haier', 'Hannspree',
       'HealBe', 'Huawei', 'I.am', 'iFit', 'iHealth', "I'm", 'Imco',
       'InBody', 'Intex', 'iRadish ', 'Jawbone', 'Jaybird', 'Jumpy', 'K8',
       'Kate Spade', 'König', 'LeapFrog', 'Lefun', 'Lenovo', 'LG',
       'LifeTrak', 'Martian', 'Meta Watch', 'Michael Kors ', 'Microsoft',
       'Mio', 'Misfit', 'Mobile Action', 'Mobvoi', 'Mondaine',
       'Montblanc', 'Moov', 'Motorola', 'Movado', 'Movband:', 'Movo',
       'MyKronoz', 'Neptune', 'Nevo', 'New Balance ', 'Nike', 'Nixon',
       'No. 1', 'Nymi', 'Olio', 'Omate', 'Omron', 'O-Synce', 'Oumax',
       'Pebble', 'Ph

In [13]:
#checking Dytype and Count
renamed_tracker_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 406 entries, 0 to 422
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Company            406 non-null    object 
 1   Device             406 non-null    object 
 2   Crowd funded       406 non-null    bool   
 3   Country of origin  406 non-null    object 
 4   Region             406 non-null    object 
 5   Release year       406 non-null    float64
 6   Type               406 non-null    object 
 7   Accelerometer      406 non-null    bool   
 8   Gyroscope          406 non-null    bool   
 9   Magnetometer       406 non-null    bool   
 10  Barometer          406 non-null    bool   
 11  GPS                406 non-null    bool   
 12  Heart Rate         406 non-null    bool   
dtypes: bool(7), float64(1), object(5)
memory usage: 25.0+ KB


In [19]:
#Change Release year, Company, Device, Country of Origin, and Type to stings 
renamed_tracker_data ["Release year"] = renamed_tracker_data["Release year"].astype(int)
renamed_tracker_data ["Company"] = renamed_tracker_data["Company"].astype("string")
renamed_tracker_data ["Device"] = renamed_tracker_data["Device"].astype("string")
renamed_tracker_data ["Country of origin"] = renamed_tracker_data["Country of origin"].astype("string")
renamed_tracker_data ["Type"] = renamed_tracker_data["Type"].astype("string")
renamed_tracker_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 406 entries, 0 to 422
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Company            406 non-null    string
 1   Device             406 non-null    string
 2   Crowd funded       406 non-null    bool  
 3   Country of origin  406 non-null    string
 4   Region             406 non-null    object
 5   Release year       406 non-null    int32 
 6   Type               406 non-null    string
 7   Accelerometer      406 non-null    bool  
 8   Gyroscope          406 non-null    bool  
 9   Magnetometer       406 non-null    bool  
 10  Barometer          406 non-null    bool  
 11  GPS                406 non-null    bool  
 12  Heart Rate         406 non-null    bool  
dtypes: bool(7), int32(1), object(1), string(4)
memory usage: 23.4+ KB


In [34]:
#Renamed No. 1 to the property Company Name as DT No.1
renamed_tracker_data["Company"] = renamed_tracker_data["Company"].replace("No. 1", "DT No.1")

In [35]:
#Verifing name change
renamed_tracker_data.loc[renamed_tracker_data["Company"] == "DT No.1"].head()

Unnamed: 0,Company,Device,Crowd funded,Country of origin,Region,Release year,Type,Accelerometer,Gyroscope,Magnetometer,Barometer,GPS,Heart Rate
248,DT No.1,D3,False,China,Asia-Pacific,2015,watch,True,False,False,False,False,True
249,DT No.1,D5,False,China,Asia-Pacific,2015,watch,True,False,False,False,True,True
250,DT No.1,D5+,False,China,Asia-Pacific,2016,watch,True,True,False,True,True,True
251,DT No.1,D6,False,China,Asia-Pacific,2016,watch,True,False,False,True,True,True
252,DT No.1,D7,False,China,Asia-Pacific,2017,watch,True,False,False,True,True,True


In [37]:
#Export the DataFrame
renamed_tracker_data.to_csv("cleaned_tracker_data.csv", index=True)