## Acquire process:

In [2]:
import pandas as pd
import numpy as np
import os


# acquire
from env import host, user, password
from pydataset import data

In [3]:
# Create helper function to get the necessary connection url.

def get_connection(db_name):
    '''
    This function uses my info from my env file to
    create a connection url to access the Codeup db.
    '''
    from env import host, user, password
    return f'mysql+pymysql://{user}:{password}@{host}/{db_name}'


In [4]:
def get_telco_churn_data():
    '''
    This function reads in the iris data from the Codeup db
    and returns a pandas DataFrame with all columns.
    '''
    
    sql_query = '''
    SELECT *
    FROM customers
    JOIN contract_types ON customers.contract_type_id = contract_types.contract_type_id
    JOIN payment_types ON customers.payment_type_id = payment_types.payment_type_id
    JOIN internet_service_types ON customers.internet_service_type_id = internet_service_types.internet_service_type_id
    '''
    return pd.read_sql(sql_query, get_connection('telco_churn'))

_________

## Explore the data

In [6]:
#rename data to 'telco'
telco = get_telco_churn_data()

In [7]:
#look at data
telco.head()

Unnamed: 0,customer_id,gender,senior_citizen,partner,dependents,tenure,phone_service,multiple_lines,internet_service_type_id,online_security,...,payment_type_id,monthly_charges,total_charges,churn,contract_type_id,contract_type,payment_type_id.1,payment_type,internet_service_type_id.1,internet_service_type
0,0016-QLJIS,Female,0,Yes,Yes,65,Yes,Yes,1,Yes,...,2,90.45,5957.9,No,3,Two year,2,Mailed check,1,DSL
1,0017-DINOC,Male,0,No,No,54,No,No phone service,1,Yes,...,4,45.2,2460.55,No,3,Two year,4,Credit card (automatic),1,DSL
2,0019-GFNTW,Female,0,No,No,56,No,No phone service,1,Yes,...,3,45.05,2560.1,No,3,Two year,3,Bank transfer (automatic),1,DSL
3,0056-EPFBG,Male,0,Yes,Yes,20,No,No phone service,1,Yes,...,4,39.4,825.4,No,3,Two year,4,Credit card (automatic),1,DSL
4,0078-XZMHT,Male,0,Yes,No,72,Yes,Yes,1,No,...,3,85.15,6316.2,No,3,Two year,3,Bank transfer (automatic),1,DSL


In [9]:
#this data appears to have no nulls
#data has int and object data types
telco.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 27 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   customer_id               7043 non-null   object 
 1   gender                    7043 non-null   object 
 2   senior_citizen            7043 non-null   int64  
 3   partner                   7043 non-null   object 
 4   dependents                7043 non-null   object 
 5   tenure                    7043 non-null   int64  
 6   phone_service             7043 non-null   object 
 7   multiple_lines            7043 non-null   object 
 8   internet_service_type_id  7043 non-null   int64  
 9   online_security           7043 non-null   object 
 10  online_backup             7043 non-null   object 
 11  device_protection         7043 non-null   object 
 12  tech_support              7043 non-null   object 
 13  streaming_tv              7043 non-null   object 
 14  streamin

_______

### get curious about the data

In [16]:
#we are trying to determine churn... look into that
telco.churn.value_counts()

#this shows 26.54% of customers churn (1869 out of 7043)

No     5174
Yes    1869
Name: churn, dtype: int64

In [15]:
#find ou how many are senior citizens and how many are not
telco.senior_citizen.value_counts()

#this shows that 16.21% of all customers churn (1142 out of 7043)

0    5901
1    1142
Name: senior_citizen, dtype: int64

In [12]:
#average tenure, max tenure, min tenure??
telco.tenure.mean(), telco.tenure.max(), telco.tenure.min()

(32.37114865824223, 72, 0)

In [13]:
#average monthly charges, max monthly charges, min monthly charges
telco.monthly_charges.mean(), telco.monthly_charges.max(), telco.monthly_charges.min()

(64.76169246059882, 118.75, 18.25)

In [30]:
#group by churn and senior_citizen 
senior_churn= telco.groupby(['churn','senior_citizen']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,customer_id,gender,partner,dependents,tenure,phone_service,multiple_lines,internet_service_type_id,online_security,online_backup,...,paperless_billing,payment_type_id,monthly_charges,total_charges,contract_type_id,contract_type,payment_type_id,payment_type,internet_service_type_id,internet_service_type
churn,senior_citizen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
No,0,4508,4508,4508,4508,4508,4508,4508,4508,4508,4508,...,4508,4508,4508,4508,4508,4508,4508,4508,4508,4508
No,1,666,666,666,666,666,666,666,666,666,666,...,666,666,666,666,666,666,666,666,666,666
Yes,0,1393,1393,1393,1393,1393,1393,1393,1393,1393,1393,...,1393,1393,1393,1393,1393,1393,1393,1393,1393,1393
Yes,1,476,476,476,476,476,476,476,476,476,476,...,476,476,476,476,476,476,476,476,476,476


In [35]:
#senior_churn = senior_churn.drop(['customer_id'], axis=1)
senior_churn.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,gender,partner,dependents,tenure,phone_service,multiple_lines,internet_service_type_id,online_security,online_backup,device_protection,...,paperless_billing,payment_type_id,monthly_charges,total_charges,contract_type_id,contract_type,payment_type_id,payment_type,internet_service_type_id,internet_service_type
churn,senior_citizen,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
No,0,4508,4508,4508,4508,4508,4508,4508,4508,4508,4508,...,4508,4508,4508,4508,4508,4508,4508,4508,4508,4508
No,1,666,666,666,666,666,666,666,666,666,666,...,666,666,666,666,666,666,666,666,666,666
Yes,0,1393,1393,1393,1393,1393,1393,1393,1393,1393,1393,...,1393,1393,1393,1393,1393,1393,1393,1393,1393,1393
Yes,1,476,476,476,476,476,476,476,476,476,476,...,476,476,476,476,476,476,476,476,476,476


_________