# Importing Modules

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import matplotlib.pyplot as plt
%matplotlib inline
import warnings 
warnings.filterwarnings(action="ignore")
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv("/kaggle/input/indianeedsoxygen-tweets/IndiaWantsOxygen.csv")
df.head(10)

In [None]:
# shape of dataset
df.shape

In [None]:
# info of data like datatypes, memory used etc.
df.info()

In [None]:
# checking for null values
df.isnull().sum()

In [None]:
# removing null values from user_name columns
df = df[pd.notnull(df["user_name"])]

In [None]:
# Now we have no null values in user_name column
df.isna().sum()

In [None]:
# Number of Users
df.user_name.nunique()

<div class="alert alert-box alert-info">
There are almost around 28000 Tweets tweeted by More than 20000 Users.
</div>

In [None]:
df.user_location.value_counts().head(20)

# Visualization

## WordCloud

In [None]:
from wordcloud import WordCloud, STOPWORDS
plt.figure(figsize=(20,20))
words = "".join(df["text"])

wc = WordCloud(width=2000, height=800, background_color="white", min_font_size=10).generate(words)
plt.imshow(wc)
plt.axis("off")
plt.show()

In [None]:
plt.figure(figsize=(20,20))
words2 = "".join(df["hashtags"])
wc2 = WordCloud(width=2000, height=800, background_color="white",min_font_size=10).generate(words2)
plt.imshow(wc2)
plt.axis("off")
plt.show()

In [None]:
df["date"] = pd.to_datetime(df.date)
df.date = df.date.apply(lambda x: str(x).split(" ")[0])
df.date

## Line Plot

In [None]:
x = df.groupby("date").date.count()
plt.figure(figsize=(16,9))
sns.set_style("whitegrid")
sns.lineplot(x.index, x.values)
plt.title("Frequency of Tweets ", fontsize=14)
plt.ylabel("Number of Tweets", fontsize=14)
plt.xlabel("Dates", fontsize=14)

In [None]:
x = df.user_verified.value_counts()
plt.figure(figsize=(15,7))
labels=("Verified","Non Verified")
plt.pie(x, labels=labels,autopct = "%1.1f%%")
plt.show()

In [None]:
df["is_retweet"].unique()

<div class=" alert alert-box alert-info">
No Tweet has been retweeted.
</div>

In [None]:
df.head()

In [None]:
X = df.source.value_counts().head(10)
fig = px.bar(X,x = X.index,y= X.values)
fig.update_layout(title="Tweets made by each Source")
fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x = df.user_name.head(30).sort_values(),
        y = df.user_followers.head(30),
        name = "User Followers"
    ))
fig.add_trace(
    go.Scatter(
        x = df["user_name"].head(30).sort_values(),
        y = df["user_friends"].head(30),
        name= "User Friends"))

fig.update_layout(title="User Friends and User Followers")
fig.show()

In [None]:
fig1 = go.Figure()

fig1.add_trace(
    go.Scatter(
        x = df["user_name"].head(30).sort_values(),
        y = df["user_favourites"].head(30),
        name= "User Favourites"))

fig1.update_layout(title="User Favourites")
fig1.show()

In [None]:
df["user_location"].value_counts().head(30)

In [None]:
location = df["user_location"].value_counts().head(30)
plt.figure(figsize=(10,8))
sns.barplot(y=location.index,x=location.values)
plt.title("Tweets from Different Locations",fontsize=14)
plt.xlabel("Location",fontsize=14)
plt.ylabel("Tweets",fontsize=14)
plt.xticks(rotation=45)
plt.show()

<div class="alert alert-box alert-warning">
Please UPVOTE if You find this notebook insightful! And Feel free to ask any doubts regarding the code.
    
Thanks in advance!
</div>