# Tobacco Use Over Time

In [None]:
# Import dependencies
import pandas as pd
import requests
import matplotlib.pyplot as plt

In [3]:
# Read in csv file
tobacco_data = pd.read_csv("./Resources/Clean_Tobacco_data.csv")

tobacco_data.head()

Unnamed: 0.1,Unnamed: 0,YEAR,LocationDesc,TopicDesc,MeasureDesc,Response,Data_Value,Sample_Size,Gender,Race,Age
0,0,2019,National Median (States and DC),Cessation (Adults),Percent of Former Smokers Among Ever Smokers,,60.8,,Overall,All Races,All Ages
1,1,2019,New Hampshire,Cigarette Use (Adults),Smoking Frequency,Every Day,76.3,357.0,Female,All Races,All Ages
2,2,2019,Florida,Cigarette Use (Adults),Current Smoking,,14.8,15824.0,Overall,All Races,All Ages
3,3,2019,Hawaii,Smokeless Tobacco Use (Adults),Current Use,,2.1,459.0,Overall,All Races,18 to 24 Years
4,4,2019,Alabama,Smokeless Tobacco Use (Adults),User Status,Not Current,88.6,2729.0,Male,All Races,All Ages


In [4]:
# Remove Unnamed column
tobacco_data = tobacco_data[["YEAR", "LocationDesc", "TopicDesc", "MeasureDesc", "Response", "Data_Value", 
                            "Sample_Size", "Gender", "Race", "Age"]]

In [5]:
# Change LocationDesc "National Median(States and DC)" value to "U.S."
tobacco_data["LocationDesc"] = tobacco_data["LocationDesc"].replace(["National Median (States and DC)"], "U.S.")

tobacco_data.head()

Unnamed: 0,YEAR,LocationDesc,TopicDesc,MeasureDesc,Response,Data_Value,Sample_Size,Gender,Race,Age
0,2019,U.S.,Cessation (Adults),Percent of Former Smokers Among Ever Smokers,,60.8,,Overall,All Races,All Ages
1,2019,New Hampshire,Cigarette Use (Adults),Smoking Frequency,Every Day,76.3,357.0,Female,All Races,All Ages
2,2019,Florida,Cigarette Use (Adults),Current Smoking,,14.8,15824.0,Overall,All Races,All Ages
3,2019,Hawaii,Smokeless Tobacco Use (Adults),Current Use,,2.1,459.0,Overall,All Races,18 to 24 Years
4,2019,Alabama,Smokeless Tobacco Use (Adults),User Status,Not Current,88.6,2729.0,Male,All Races,All Ages


In [6]:
# Filter data to just U.S.

us_data = tobacco_data.loc[tobacco_data["LocationDesc"] == "U.S."]

us_data.head()

Unnamed: 0,YEAR,LocationDesc,TopicDesc,MeasureDesc,Response,Data_Value,Sample_Size,Gender,Race,Age
0,2019,U.S.,Cessation (Adults),Percent of Former Smokers Among Ever Smokers,,60.8,,Overall,All Races,All Ages
471,2019,U.S.,Cigarette Use (Adults),Smoking Status,Never,58.6,,Overall,All Races,All Ages
1429,2019,U.S.,Cigarette Use (Adults),Smoking Status,Former,25.2,,Overall,All Races,All Ages
1960,2019,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.2,,Overall,All Races,All Ages
2217,2019,U.S.,Cigarette Use (Adults),Smoking Status,Current,16.0,,Overall,All Races,All Ages


In [81]:
# Filter data to current cigarette use

cigarette_df = us_data.loc[(us_data["TopicDesc"] == "Cigarette Use (Adults)") & (us_data["Response"] == "Current")]

cigarette_df

Unnamed: 0,YEAR,LocationDesc,TopicDesc,MeasureDesc,Response,Data_Value,Sample_Size,Gender,Race,Age
2217,2019,U.S.,Cigarette Use (Adults),Smoking Status,Current,16.0,,Overall,All Races,All Ages
4558,2017,U.S.,Cigarette Use (Adults),Smoking Status,Current,17.1,,Overall,All Races,All Ages
9340,2018,U.S.,Cigarette Use (Adults),Smoking Status,Current,16.1,,Overall,All Races,All Ages
15071,2014,U.S.,Cigarette Use (Adults),Smoking Status,Current,18.1,,Overall,All Races,All Ages
16119,2016,U.S.,Cigarette Use (Adults),Smoking Status,Current,17.1,,Overall,All Races,All Ages
26084,2013,U.S.,Cigarette Use (Adults),Smoking Status,Current,19.0,,Overall,All Races,All Ages
27414,2015,U.S.,Cigarette Use (Adults),Smoking Status,Current,17.5,,Overall,All Races,All Ages
28569,2012,U.S.,Cigarette Use (Adults),Smoking Status,Current,19.6,,Overall,All Races,All Ages
29643,2011,U.S.,Cigarette Use (Adults),Smoking Status,Current,21.2,,Overall,All Races,All Ages


In [82]:
# Sort df by year

cigarette2_df = cigarette_df.sort_values(by=["YEAR"])

cigarette2_df

Unnamed: 0,YEAR,LocationDesc,TopicDesc,MeasureDesc,Response,Data_Value,Sample_Size,Gender,Race,Age
29643,2011,U.S.,Cigarette Use (Adults),Smoking Status,Current,21.2,,Overall,All Races,All Ages
28569,2012,U.S.,Cigarette Use (Adults),Smoking Status,Current,19.6,,Overall,All Races,All Ages
26084,2013,U.S.,Cigarette Use (Adults),Smoking Status,Current,19.0,,Overall,All Races,All Ages
15071,2014,U.S.,Cigarette Use (Adults),Smoking Status,Current,18.1,,Overall,All Races,All Ages
27414,2015,U.S.,Cigarette Use (Adults),Smoking Status,Current,17.5,,Overall,All Races,All Ages
16119,2016,U.S.,Cigarette Use (Adults),Smoking Status,Current,17.1,,Overall,All Races,All Ages
4558,2017,U.S.,Cigarette Use (Adults),Smoking Status,Current,17.1,,Overall,All Races,All Ages
9340,2018,U.S.,Cigarette Use (Adults),Smoking Status,Current,16.1,,Overall,All Races,All Ages
2217,2019,U.S.,Cigarette Use (Adults),Smoking Status,Current,16.0,,Overall,All Races,All Ages


In [83]:
%matplotlib notebook 

In [84]:
# Plot year and current use percentage

x_axis = cigarette2_df["YEAR"]

y_axis = cigarette2_df["Data_Value"]

plt.figure(1)
plt.plot(x_axis, y_axis, marker = "o")

plt.xlabel("Year")
plt.ylabel("Current Cigarette Users (Median %)")
plt.title("Current Cigarette Users from 2011-2019")

plt.text(2011.1, 21.2, str(21.2))
plt.text(2012.1, 19.6, str(19.6))
plt.text(2013.1, 19.0, str(19.0))
plt.text(2014.1, 18.1, str(18.1))
plt.text(2015.1, 17.5, str(17.5))
plt.text(2016.1, 17.2, str(17.1))
plt.text(2017.1, 17.1, str(17.1))
plt.text(2018.1, 16.2, str(16.1))
plt.text(2018.9, 16.1, str(16.0))

plt.show()

<IPython.core.display.Javascript object>

In [85]:
# Filter data to current smokeless tobacco use

smokeless_df = us_data.loc[(us_data["TopicDesc"] == "Smokeless Tobacco Use (Adults)") & (us_data["Response"] == "Current")]

smokeless_df

Unnamed: 0,YEAR,LocationDesc,TopicDesc,MeasureDesc,Response,Data_Value,Sample_Size,Gender,Race,Age
1960,2019,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.2,,Overall,All Races,All Ages
6962,2018,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.2,,Overall,All Races,All Ages
8705,2017,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.0,,Overall,All Races,All Ages
20980,2014,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.2,,Overall,All Races,All Ages
21934,2012,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.1,,Overall,All Races,All Ages
23067,2015,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.0,,Overall,All Races,All Ages
25356,2013,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.3,,Overall,All Races,All Ages
25504,2016,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.0,,Overall,All Races,All Ages
30545,2011,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.4,,Overall,All Races,All Ages


In [86]:
# Sort df by year

smokeless2_df = smokeless_df.sort_values(by=["YEAR"])

smokeless2_df

Unnamed: 0,YEAR,LocationDesc,TopicDesc,MeasureDesc,Response,Data_Value,Sample_Size,Gender,Race,Age
30545,2011,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.4,,Overall,All Races,All Ages
21934,2012,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.1,,Overall,All Races,All Ages
25356,2013,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.3,,Overall,All Races,All Ages
20980,2014,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.2,,Overall,All Races,All Ages
23067,2015,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.0,,Overall,All Races,All Ages
25504,2016,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.0,,Overall,All Races,All Ages
8705,2017,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.0,,Overall,All Races,All Ages
6962,2018,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.2,,Overall,All Races,All Ages
1960,2019,U.S.,Smokeless Tobacco Use (Adults),User Status,Current,4.2,,Overall,All Races,All Ages


In [87]:
# Plot year and current use percentage

x_axis = smokeless2_df["YEAR"]

y_axis = smokeless2_df["Data_Value"]

plt.figure(2)
plt.plot(x_axis, y_axis, marker = "^", color= "red")

plt.xlabel("Year")
plt.ylabel("Current Smokeless Tobacco Users (Median %)")
plt.title("Current Smokeless Tobacco Users from 2011-2019")

plt.text(2011.1, 4.40, str(4.4))
plt.text(2012.1, 4.11, str(4.1))
plt.text(2013.1, 4.31, str(4.3))
plt.text(2014.1, 4.21, str(4.2))
plt.text(2015.1, 4.01, str(4.0))
plt.text(2016.1, 4.01, str(4.0))
plt.text(2017.1, 4.01, str(4.0))
plt.text(2018.1, 4.21, str(4.2))
plt.text(2018.9, 4.21, str(4.2))

plt.show()

<IPython.core.display.Javascript object>

In [88]:
# Filter data to current ecigarette use

ecig_df = us_data.loc[(us_data["TopicDesc"] == "E-Cigarette Use (Adults)") & (us_data["Response"] == "Current")]

ecig_df

Unnamed: 0,YEAR,LocationDesc,TopicDesc,MeasureDesc,Response,Data_Value,Sample_Size,Gender,Race,Age
5463,2018,U.S.,E-Cigarette Use (Adults),User Status,Current,5.6,,Overall,All Races,All Ages
6731,2017,U.S.,E-Cigarette Use (Adults),User Status,Current,4.6,,Overall,All Races,All Ages
15592,2016,U.S.,E-Cigarette Use (Adults),User Status,Current,4.7,,Overall,All Races,All Ages


In [89]:
# Sort df by year

ecig2_df = ecig_df.sort_values(by="YEAR")

ecig2_df

Unnamed: 0,YEAR,LocationDesc,TopicDesc,MeasureDesc,Response,Data_Value,Sample_Size,Gender,Race,Age
15592,2016,U.S.,E-Cigarette Use (Adults),User Status,Current,4.7,,Overall,All Races,All Ages
6731,2017,U.S.,E-Cigarette Use (Adults),User Status,Current,4.6,,Overall,All Races,All Ages
5463,2018,U.S.,E-Cigarette Use (Adults),User Status,Current,5.6,,Overall,All Races,All Ages


In [109]:
# Plot year and current use percentage

x_axis = ecig2_df["YEAR"]

y_axis = ecig2_df["Data_Value"]

plt.figure(3)
plt.plot(x_axis, y_axis, marker = "*", color= "green")

labels = ['2016', '2017', '2018']
plt.xticks(x_axis, labels)

plt.text(2016, 4.71, str(4.7))
plt.text(2017.1, 4.61, str(4.6))
plt.text(2018, 5.61, str(5.6))

plt.xlabel("Year")
plt.ylabel("Current E-Cigarette Users (Median %)")
plt.title("Current E-Cigarette Users from 2016-2019")

plt.show()

<IPython.core.display.Javascript object>

* Cigarette use has declined from 2011-2019.

* Smokeless tobacco use has not clearly trended in one direction from 2011-2019, and has remained low compared to cigarette use.

* There is not enough data on e-cigarette use to determine a trend. 