In [21]:
# Module 7 Lab - Case Study: Panda and Data Science in Python
# Steve Baker

# Importing the pandas library
import pandas as pd

# Creating a DataFrame from a CSV file
df = pd.read_csv('telco_churn.csv')

# Visualizing the first few rows of the DataFrame
print("First 5 rows of the DataFrame:")
print(df.head())

# Visualizing the first 10 rows of the DataFrame
print("\nFirst 10 rows of the DataFrame:")
print(df.head(10))

# Creating a DataFrame from a dictionary
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 24, 35, 32],
    'City': ['New York', 'Paris', 'Berlin', 'London']
}
df_dict = pd.DataFrame.from_dict(data)

# Visualizing the DataFrame created from a dictionary
print("\nDataFrame created from a dictionary:")
print(df_dict.head())

# Viewing the bottom 5 rows of the DataFrame
print("\nBottom 5 rows of the DataFrame:")
print(df.tail())

# Viewing the bottom 15 rows of the DataFrame
print("\nBottom 15 rows of the DataFrame:")
print(df.tail(15))

# Displaying columns and data types
print("\nColumns in the DataFrame:")
print(df.columns)

print("\nData types of the columns:")
print(df.dtypes)

# Calculating summary statistics
print("\nSummary statistics for numerical columns:")
print(df.describe())

# Calculating summary statistics for object columns
print("\nSummary statistics for object columns:")
print(df.describe(include='object'))

# Filtering columns
print("\nState column:")
print(df['State'].head())

print("\nInternational Plan column:")
print(df['International plan'].head())

print("\nMultiple columns (State and International Plan):")
print(df[['State', 'International plan']].head())

# Finding unique values in a column
print("\nUnique values in the State column:")
print(df['State'].unique())

print("\nUnique values in the Churn column:")
print(df['Churn'].unique())

# Filtering rows based on a condition
print("\nRows where International Plan is 'No':")
print(df[df['International plan'] == 'No'].head())

# Filtering rows based on multiple conditions
print("\nRows where International Plan is 'No' and Churn is 'True':")
print(df[(df['International plan'] == 'No') & (df['Churn'] == True)].head())

# Indexing with iloc
print("\n15th row of the DataFrame:")
print(df.iloc[14])

print("\nState value of the 15th row:")
print(df.iloc[14, 0])

print("\nLast column of the 15th row:")
print(df.iloc[14, -1])

# Slicing with iloc
print("\nRows 22 to 33 of the DataFrame:")
print(df.iloc[22:33])

# Setting an index and using loc
df.set_index('State', inplace=True)
print("\nDataFrame with 'state' as index:")
print(df.head())

print("\nRows where state is 'OH':")
print(df.loc['OH'])

# Dropping rows with missing values
print("\nNumber of missing values in each column before dropping:")
print(df.isnull().sum())

df.dropna(inplace=True)
print("\nNumber of missing values in each column after dropping:")
print(df.isnull().sum())

#here

# Dropping a column
df.drop('Area code', axis=1, inplace=True)
print("DataFrame after dropping 'area code' column:")
print(df.head())

# Creating a new calculated column
df['new column'] = df['Total night minutes'] + df['Total intl minutes']
print("\nDataFrame after adding 'new column':")
print(df.head())

# Updating an entire column
df['new column'] = 100
print("\nDataFrame after setting 'new column' to 100:")
print(df.head())

# Updating a single value using iloc
df.iloc[0, -1] = 10
print("\nDataFrame after setting the first value of 'new column' to 10:")
print(df.head())

# Using the apply function to update values based on a condition
df['Churn binary'] = df['Churn'].apply(lambda x: 1 if x else 0)
print("\nDataFrame after applying lambda function to 'churn' column:")
print(df[['Churn', 'Churn binary']].head())

# Outputting the DataFrame to a CSV file
df.to_csv('output.csv', index=False)
print("\nDataFrame has been output to 'output.csv'.")

# Outputting the DataFrame to a JSON file
df.reset_index(drop=True, inplace=True)
df.to_json('output.json')
print("DataFrame has been output to 'output.json'.")

# Outputting the DataFrame to an HTML file
df.to_html('output.html')
print("DataFrame has been output to 'output.html'.")

# Deleting the DataFrame
del df
print("DataFrame has been deleted.")

First 5 rows of the DataFrame:
  State  Account length  Area code International plan Voice mail plan  \
0    KS             128        415                 No             Yes   
1    OH             107        415                 No             Yes   
2    NJ             137        415                 No              No   
3    OH              84        408                Yes              No   
4    OK              75        415                Yes              No   

   Number vmail messages  Total day minutes  Total day calls  \
0                     25              265.1            110.0   
1                     26              161.6            123.0   
2                      0              243.4            114.0   
3                      0              299.4             71.0   
4                      0              166.7            113.0   

   Total day charge  Total eve minutes  Total eve calls  Total eve charge  \
0             45.07              197.4             99.0             