In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

### Step 1: Read Raw Data and Combine No Service to No

In [2]:
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [3]:
df = df.drop(['customerID','TotalCharges'],axis=1)

In [4]:
df.loc[df['MultipleLines']=='No phone service','MultipleLines'] = 'No'
df.loc[df['OnlineSecurity']=='No internet service','OnlineSecurity'] = 'No'
df.loc[df['OnlineBackup']=='No internet service','OnlineBackup'] = 'No'
df.loc[df['DeviceProtection']=='No internet service','DeviceProtection'] = 'No'
df.loc[df['TechSupport']=='No internet service','TechSupport'] = 'No'
df.loc[df['StreamingTV']=='No internet service','StreamingTV'] = 'No'
df.loc[df['StreamingMovies']=='No internet service','StreamingMovies'] = 'No'

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7043 non-null   object 
 1   SeniorCitizen     7043 non-null   int64  
 2   Partner           7043 non-null   object 
 3   Dependents        7043 non-null   object 
 4   tenure            7043 non-null   int64  
 5   PhoneService      7043 non-null   object 
 6   MultipleLines     7043 non-null   object 
 7   InternetService   7043 non-null   object 
 8   OnlineSecurity    7043 non-null   object 
 9   OnlineBackup      7043 non-null   object 
 10  DeviceProtection  7043 non-null   object 
 11  TechSupport       7043 non-null   object 
 12  StreamingTV       7043 non-null   object 
 13  StreamingMovies   7043 non-null   object 
 14  Contract          7043 non-null   object 
 15  PaperlessBilling  7043 non-null   object 
 16  PaymentMethod     7043 non-null   object 


In [6]:
len(df)

7043

### Step 2: Download Tweets with keywords "T-Mobile" and "Verizon"

In [7]:
import tweepy
from textblob import TextBlob
import preprocessor as p
import statistics
from typing import List

#### Import consumer_key and consumer_secret obtained from Twitter Developer Account

In [8]:
consumer_key = 'C2yPbBxFC14dsNuekxhc5segd'
consumer_secret = 'szQgWnWrkMTgWKt2PyCkZgdrpvhatqMxPu3hidHk4NNExR1IRV'

In [9]:
auth = tweepy.AppAuthHandler(consumer_key,consumer_secret)
api = tweepy.API(auth)

#### Create function to download tweets from Twitter with keyword

In [10]:
def get_tweets(keyword: str) -> List[str]:
    
    all_tweets = []
    for tweet in tweepy.Cursor(api.search, q=keyword, tweet_mode='extended',lang='en').items(4000):
        all_tweets.append(tweet.full_text)
    
    return all_tweets

#### Create function to do initial basic cleaning for tweets downloaded

In [11]:
def clean_tweets(all_tweets: List[str]) -> List[str]:
    
    tweets_clean = []
    for tweet in all_tweets:
        tweets_clean.append(p.clean(tweet))
    
    return tweets_clean

#### Download 4000 tweets with keyword 'T-Mobile' and another 4000 tweets with keyword 'Verizon'

In [12]:
Tweet1 = clean_tweets(get_tweets('T-Mobile'))

In [13]:
Tweet1

["Making sure you get the most out of T-Mobile Tuesdays is key and we'd love to help! Please send us a DM, we'll meet you there! ^RyanMitchener",
 'The Note was just launched. I\'m so glad that I actually found you advertisement. I had no idea that these phone actually exist. Q" Where can I buy? And can a national carrier like T-MOBILE allow service for it? Or where must I go for service?" I have Samsung Note now.',
 ': I get penalized for being broke with my T-Mobile phone bill. They disconnect you and make you pay',
 'Just changed my Sprint SIM card for a T-mobile one and I have less bars at home. Devil in a new dress',
 ': Oh I hear you! Between my wife and me we are k cant afford a second vehicle. Paying $246 a mon',
 ': I get penalized for being broke with my T-Mobile phone bill. They disconnect you and make you pay',
 "I was with T-Mobile since the Voice Stream days. Never had a problem, loved it. Only reason I don't have it now because I live in New Zealand. You need to roll it 

In [14]:
np.save('Tweet1',Tweet1)

In [15]:
Tweet2 =  clean_tweets(get_tweets('Verizon'))

In [17]:
np.save('Tweet2',Tweet2)

In [19]:
Tweet1_df = pd.DataFrame(Tweet1,columns=['Tweet'])

In [20]:
Tweet2_df = pd.DataFrame(Tweet2,columns=['Tweet'])

#### Combine all tweets together and form a dataframe

In [42]:
All_Tweet = pd.concat((Tweet1_df,Tweet2_df),axis=0,ignore_index=True)

### Step 3: Basic Text Cleaning and Get Polarity of Tweets

In [43]:
import re

In [44]:
# Get rid of the starting colons in some of the tweets

All_Tweet['Tweet'] = All_Tweet['Tweet'].apply(lambda x: re.sub(r':','',x))

In [45]:
# Create a function to get the polarity (how positive and negative the tweet is)

def getPolarity(text):
    return TextBlob(text).sentiment.polarity

In [46]:
All_Tweet['Polarity'] = All_Tweet['Tweet'].apply(getPolarity)

In [47]:
All_Tweet = All_Tweet[:7043]

In [48]:
len(All_Tweet)

7043

In [89]:
All_Tweet

Unnamed: 0,Tweet,Polarity
0,Making sure you get the most out of T-Mobile T...,0.445312
1,The Note was just launched. I'm so glad that I...,0.166667
2,I get penalized for being broke with my T-Mob...,0.000000
3,Just changed my Sprint SIM card for a T-mobile...,-0.015152
4,Oh I hear you! Between my wife and me we are ...,0.000000
...,...,...
7038,"As learning goes online, so do teachers &amp; ...",0.333333
7039,"I just saw the gb data ""reward"" offered in Ver...",0.357143
7040,Marketing | Verizon Announces Resources for Fe...,-0.083333
7041,is there an issue with SMS based two-step veri...,0.000000


In [90]:
All_Tweet.to_csv('/Users/Xingkang/Desktop/TweetSentiment.csv',index=False)