# Libraries and Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from scipy import stats

In [2]:
# Load data, setting first column as DataFrame index
df = pd.read_csv('medical_raw_data.csv', index_col=0)
df.head()

Unnamed: 0,CaseOrder,Customer_id,Interaction,UID,City,State,County,Zip,Lat,Lng,...,TotalCharge,Additional_charges,Item1,Item2,Item3,Item4,Item5,Item6,Item7,Item8
1,1,C412403,8cd49b13-f45a-4b47-a2bd-173ffa932c2f,3a83ddb66e2ae73798bdf1d705dc0932,Eva,AL,Morgan,35621,34.3496,-86.72508,...,3191.048774,17939.40342,3,3,2,2,4,3,3,4
2,2,Z919181,d2450b70-0337-4406-bdbb-bc1037f1734c,176354c5eef714957d486009feabf195,Marianna,FL,Jackson,32446,30.84513,-85.22907,...,4214.905346,17612.99812,3,4,3,4,4,4,3,3
3,3,F995323,a2057123-abf5-4a2c-abad-8ffe33512562,e19a0fa00aeda885b8a436757e889bc9,Sioux Falls,SD,Minnehaha,57110,43.54321,-96.63772,...,2177.586768,17505.19246,2,4,4,4,3,4,3,3
4,4,A879973,1dec528d-eb34-4079-adce-0d7a40e82205,cd17d7b6d152cb6f23957346d11c3f07,New Richland,MN,Waseca,56072,43.89744,-93.51479,...,2465.118965,12993.43735,3,5,5,3,4,5,5,5
5,5,C544523,5885f56b-d6da-43a3-8760-83583af94266,d2f0425877b10ed6bb381f3e2579424a,West Point,VA,King William,23181,37.59894,-76.88958,...,1885.655137,3716.525786,2,1,3,3,5,3,4,3


# Initial Exploration of full DataFrame

In [3]:
# Verify the shape of the DataFrame as 10,000 rows and 52 columns
df.shape

(10000, 52)

In [4]:
# Describe the data but transpose to get the data in an easier format visually
# This identifies any numerical columns
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CaseOrder,10000.0,5000.5,2886.89568,1.0,2500.75,5000.5,7500.25,10000.0
Zip,10000.0,50159.3239,27469.588208,610.0,27592.0,50207.0,72411.75,99929.0
Lat,10000.0,38.751099,5.403085,17.96719,35.25512,39.419355,42.044175,70.56099
Lng,10000.0,-91.24308,15.205998,-174.20969,-97.352982,-88.39723,-80.43805,-65.29017
Population,10000.0,9965.2538,14824.758614,0.0,694.75,2769.0,13945.0,122814.0
Children,7412.0,2.098219,2.155427,0.0,0.0,1.0,3.0,10.0
Age,7586.0,53.295676,20.659182,18.0,35.0,53.0,71.0,89.0
Income,7536.0,40484.438268,28664.86105,154.08,19450.7925,33942.28,54075.235,207249.13
VitD_levels,10000.0,19.412675,6.723277,9.519012,16.513171,18.08056,19.78974,53.019124
Doc_visits,10000.0,5.0122,1.045734,1.0,4.0,5.0,6.0,9.0


In [5]:
# Get column information on all 52 columns and check for non-nulls 
# against number of entries
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10000 entries, 1 to 10000
Data columns (total 52 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   CaseOrder           10000 non-null  int64  
 1   Customer_id         10000 non-null  object 
 2   Interaction         10000 non-null  object 
 3   UID                 10000 non-null  object 
 4   City                10000 non-null  object 
 5   State               10000 non-null  object 
 6   County              10000 non-null  object 
 7   Zip                 10000 non-null  int64  
 8   Lat                 10000 non-null  float64
 9   Lng                 10000 non-null  float64
 10  Population          10000 non-null  int64  
 11  Area                10000 non-null  object 
 12  Timezone            10000 non-null  object 
 13  Job                 10000 non-null  object 
 14  Children            7412 non-null   float64
 15  Age                 7586 non-null   float64
 16  Education

In [7]:
# Use df.duplicated() to return a Series checking for duplicate rows
df.duplicated()

1        False
2        False
3        False
4        False
5        False
         ...  
9996     False
9997     False
9998     False
9999     False
10000    False
Length: 10000, dtype: bool

In [8]:
# Use df.duplicated().value_counts() to count the sum of duplicated rows
df.duplicated().value_counts()

False    10000
Name: count, dtype: int64

In [9]:
# Drop any duplicate rows (for experimental check only)
# Store in a new DataFrame as not to make any changes to the primary DataFrame: df
df_unique = df.drop_duplicates()
df_unique.shape

(10000, 52)

In [10]:
# Check that no duplicates remain
df_unique.duplicated().value_counts()

False    10000
Name: count, dtype: int64

### Initial Findings

No duplicate rows were found. All 10,000 rows are unique enough to be retained.

# C4: Data Cleaning Code (Detection)

The code in this section is simply designed to identify any data cleanliness issues rather than perform any cleaning or wrangling steps at this time.

In [None]:
# If we were to drop all rows with ANY NA values, there would only be 2,313 rows with all values out of 10,000
no_missing_data = df.dropna()
no_missing_data.info()

In [None]:
# Sample the remaining data
no_missing_data.sample(10)

### Initial Findings

If we were to drop all rows with _any_ NA values, there would only be 2,313 rows with all values remaining out of the original 10,000 rows. This reduction in data is too severe to consider this as a cleaning method for this dataset.

## Check for number of unique values across all 52 variables

In [11]:
# Run .nunique() against the DataFrame to return the number of unique variables
# found in each column (McCoy 2024)
df.nunique()

CaseOrder             10000
Customer_id           10000
Interaction           10000
UID                   10000
City                   6072
State                    52
County                 1607
Zip                    8612
Lat                    8588
Lng                    8601
Population             5951
Area                      3
Timezone                 26
Job                     639
Children                 11
Age                      72
Education                12
Employment                5
Income                 7531
Marital                   5
Gender                    3
ReAdmis                   2
VitD_levels           10000
Doc_visits                9
Full_meals_eaten          8
VitD_supp                 6
Soft_drink                2
Initial_admin             3
HighBlood                 2
Stroke                    2
Complication_risk         3
Overweight                2
Arthritis                 2
Diabetes                  2
Hyperlipidemia            2
BackPain            

Any column with 52 or fewer unique values in this dataset will likely be a categorical variable. The exception to this statement is the 
'Children' column, which is a discrete numerical variable.

In [12]:
# Create a mask of the unique identifiers for patient data
unique_identifiers = df[['CaseOrder', 'Customer_id', 'Interaction', 'UID']]
unique_identifiers

Unnamed: 0,CaseOrder,Customer_id,Interaction,UID
1,1,C412403,8cd49b13-f45a-4b47-a2bd-173ffa932c2f,3a83ddb66e2ae73798bdf1d705dc0932
2,2,Z919181,d2450b70-0337-4406-bdbb-bc1037f1734c,176354c5eef714957d486009feabf195
3,3,F995323,a2057123-abf5-4a2c-abad-8ffe33512562,e19a0fa00aeda885b8a436757e889bc9
4,4,A879973,1dec528d-eb34-4079-adce-0d7a40e82205,cd17d7b6d152cb6f23957346d11c3f07
5,5,C544523,5885f56b-d6da-43a3-8760-83583af94266,d2f0425877b10ed6bb381f3e2579424a
...,...,...,...,...
9996,9996,B863060,a25b594d-0328-486f-a9b9-0567eb0f9723,39184dc28cc038871912ccc4500049e5
9997,9997,P712040,70711574-f7b1-4a17-b15f-48c54564b70f,3cd124ccd43147404292e883bf9ec55c
9998,9998,R778890,1d79569d-8e0f-4180-a207-d67ee4527d26,41b770aeee97a5b9e7f69c906a8119d7
9999,9999,E344109,f5a68e69-2a60-409b-a92f-ac0847b27db0,2bb491ef5b1beb1fed758cc6885c167a


## Explore each of the remaining 48 variables (unique patient identifiers excluded)

### City

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['City'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['City'].value_counts()

### State

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['State'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['State'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['State'].describe()

### County

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['County'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['County'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['County'].describe()

### Zip

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Zip'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value
df['Zip'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Zip'].describe()

### Lat

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Lat'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Lat'].value_counts()

### Lng

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Lng'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Lng'].value_counts()

### Population

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Population'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Population'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Population'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
plt.hist(df['Population']);
# df['Population'].hist()

In [None]:
# Create boxplot to visualize spread of data plus outliers
sns.boxplot(data=df, x='Population')

### Area

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Area'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Area'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Area'].describe()

### Timezone

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Timezone'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Timezone'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Timezone'].describe()

### Job

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Job'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Job'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Job'].describe()

### Children

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Children'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Children'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Children'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
plt.hist(df['Children'], bins=10);

### Age

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Age'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Age'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Age'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
plt.hist(df['Age']);

### Education

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Education'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Education'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Education'].describe()

### Employment

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Employment'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Employment'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Employment'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
plt.hist(df['Employment'], align='mid');

### Income

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Income'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Income'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Income'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
plt.hist(df['Income']);

### Marital

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Marital'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Marital'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Marital'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
plt.hist(df['Marital']);

### Gender

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Gender'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Gender'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Gender'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Gender'].hist()

### ReAdmis

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['ReAdmis'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['ReAdmis'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['ReAdmis'].describe()

### VitD_levels

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['VitD_levels'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['VitD_levels'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['VitD_levels'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['VitD_levels'].hist()

### Doc_visits

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Doc_visits'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Doc_visits'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Doc_visits'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Doc_visits'].hist()

### Full_meals_eaten

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Full_meals_eaten'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Full_meals_eaten'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Full_meals_eaten'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Full_meals_eaten'].hist()

### VitD_supp

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['VitD_supp'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['VitD_supp'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['VitD_supp'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['VitD_supp'].hist()

### Soft_drink

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Soft_drink'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Soft_drink'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Soft_drink'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Soft_drink'].hist()

### Initital_admin

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Initial_admin'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Initial_admin'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Initial_admin'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Initial_admin'].hist()

### HighBlood

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['HighBlood'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['HighBlood'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['HighBlood'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['HighBlood'].hist()

### Stroke

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Stroke'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Stroke'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Stroke'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Stroke'].hist()

### Complication_risk

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Complication_risk'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Complication_risk'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Complication_risk'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Complication_risk'].hist()

### Overweight

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Overweight'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Overweight'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Overweight'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Overweight'].hist()

### Arthritis

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Arthritis'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Arthritis'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Arthritis'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Arthritis'].hist()

### Diabetes

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Diabetes'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Diabetes'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Diabetes'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Diabetes'].hist()

### Hyperlipidemia

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Hyperlipidemia'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Hyperlipidemia'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Hyperlipidemia'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Hyperlipidemia'].hist()

### BackPain

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['BackPain'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['BackPain'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['BackPain'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['BackPain'].hist()

### Anxiety

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Anxiety'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Anxiety'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Anxiety'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Anxiety'].hist();

### Alergic_rhinitis

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Allergic_rhinitis'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Allergic_rhinitis'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Allergic_rhinitis'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Allergic_rhinitis'].hist();

### Reflux_esophagitis

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Reflux_esophagitis'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Reflux_esophagitis'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Reflux_esophagitis'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Reflux_esophagitis'].hist();

### Asthma

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Asthma'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Asthma'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Asthma'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Asthma'].hist()

### Services

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Services'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Services'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Services'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Services'].hist()

### Initial_days

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Initial_days'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Initial_days'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Initial_days'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Initial_days'].hist();

### TotalCharge

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['TotalCharge'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['TotalCharge'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['TotalCharge'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['TotalCharge'].hist()

### Additional_charges

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Additional_charges'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Additional_charges'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Additional_charges'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Additional_charges'].hist()

### Item1 (Survey Question)

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Item1'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Item1'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Item1'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Item1'].hist(bins=8)

### Item2 (Survey Question)

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Item2'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Item2'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Item2'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Item2'].hist(bins=8)

### Item 3 (Survey Question)

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Item3'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Item3'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Item3'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Item3'].hist(bins=8)

### Item4 (Survey Question)

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Item4'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Item4'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Item4'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Item4'].hist(bins=8);

### Item5 (Survey Question)

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Item5'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Item5'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Item5'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Item5'].hist(bins=8);

### Item6 (Survey Question)

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Item6'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Item6'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Item6'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Item6'].hist(bins=8)

### Item7 (Survey Question)

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Item7'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Item7'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Item7'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Item7'].hist(bins=8)

### Item8 (Survey Question)

In [None]:
# Run .nunique() against the column to return the number of unique variables
df['Item8'].nunique()

In [None]:
# Run .value_counts() against the column to get the frequency of each distinct value in descending order
df['Item8'].value_counts()

In [None]:
# Run .describe() against the column to get summary statistics on numeric and object types
df['Item8'].describe()

In [None]:
# Create histogram to view a preliminary spread of the data and distribution
df['Item8'].hist(bins=8)