-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
297 lines (196 loc) · 9.92 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
'''Importing necessary libraries'''
import streamlit as st # Streamlit for creating the web app
import preprocessor # A custom module for preprocessing WhatsApp chat data
import functions # A custom module for additional helper functions
import matplotlib.pyplot as plt # Matplotlib for plotting
import seaborn as sns # Seaborn for advanced data visualization
""" Set the title for the sidebar """
st.sidebar.title("WhatsApp Chat Analyser")
'''Create a file uploader widget in the sidebar
Users can upload a WhatsApp chat file for analysis'''
uploaded_file = st.sidebar.file_uploader("Please choose the WhatsApp file that you want to analyze")
# Check if a file has been uploaded
if uploaded_file is not None:
# Read the uploaded file as bytes and decode it as UTF-8
bytes_data = uploaded_file.getvalue()
data = bytes_data.decode("utf-8")
# Preprocess the WhatsApp chat data using a custom preprocessor
df = preprocessor.preprocess(data)
# Display the preprocessed data as a Pandas DataFrame in the main content area
st.dataframe(df)
'''To provide users with the flexibility to conduct both group and individual-level analyses
A new dropdown menu should be introduced, allowing users to choose individual selections'''
# Fetching unique users to make up the elements of the dropdown list
user_list = df['user'].unique().tolist()
# Removing 'Group_Notification' as a group user
user_list.remove('group_notification')
# Sorting the 'user_list' in ascending order
user_list.sort()
# Adding an 'Overall' element to the list, if the user wants to perform a group level analysis
user_list.insert(0,"Overall")
selected_user = st.sidebar.selectbox("Show analysis with respect to",user_list)
'''The Analysis Part'''
# This code block will execute when the "Show Analysis" button in the sidebar is clicked
if st.sidebar.button("Show Analysis"):
# Statistical Analysis
# Call the 'fetch_stats' function with 'selected_user' and 'df' as arguments and unpack the returned values
num_messages, words, num_media_messages, num_links = functions.fetch_stats(selected_user, df)
# Set the title of the web page to "Top Statistics"
st.title("Top Statistics")
# Divide the page into four columns
col1, col2, col3, col4 = st.columns(4)
# Inside the first column
with col1:
# Display a header with the text "Total Messages"
st.header("Total Messages")
# Display the value of 'num_messages' as a title
st.title(num_messages)
# Inside the second column
with col2:
# Display a header with the text "Total Words"
st.header("Total Words")
# Display the value of 'words' as a title
st.title(words)
# Inside the third column
with col3:
# Display a header with the text "Media Shared"
st.header("Media Shared")
# Display the value of 'num_media_messages' as a title
st.title(num_media_messages)
# Inside the fourth column
with col4:
# Display a header with the text "Links Shared"
st.header("Links Shared")
# Display the value of 'num_links' as a title
st.title(num_links)
# Monthly Timeline Graph
# Set the title to "Monthly Timeline"
st.title("Monthly Timeline")
# Call a function 'monthly_timeline' from a module 'functions' with parameters 'selected_user' and 'df' and store the result in 'timeline'
timeline = functions.monthly_timeline(selected_user, df)
# Create a new figure and axis for plotting
fig, ax = plt.subplots()
# Plot the data from the 'timeline' DataFrame, where 'time' is the x-axis and 'message' is the y-axis, using a green line
ax.plot(timeline['time'], timeline['message'], color='green')
# Rotate the x-axis labels vertically for better readability
plt.xticks(rotation='vertical')
# Display the matplotlib figure using Streamlit's 'st.pyplot()' function
st.pyplot(fig)
# Daily Timeline Graph
# Setting the title
st.title("Daily Timeline")
# Calling the 'daily_timeline' function to generate data for the timeline
daily_timeline = functions.daily_timeline(selected_user, df)
# Create a new figure and axis for plotting using Matplotlib
fig, ax = plt.subplots()
# Plot the data from the 'daily_timeline' DataFrame
# 'daily_timeline['only_date']' contains the x-axis data (dates)
# 'daily_timeline['message']' contains the y-axis data (message counts)
ax.plot(daily_timeline['only_date'], daily_timeline['message'], color='black')
# Customize the x-axis tick labels to be vertical (rotated)
plt.xticks(rotation='vertical')
# Display the Matplotlib figure within the Streamlit app
st.pyplot(fig)
# Activity Maps
# Setting the title
st.title('Activity Map')
# Diving the area into 2 columns
col1,col2 = st.columns(2)
# Inside the first column (col1)
with col1:
# Set a header for this section
st.header("Most busy day")
# Call a function (helper.week_activity_map) to get data related to the user's weekly activity and store it in busy_day
busy_day = functions.week_activity_map(selected_user, df)
# Create a new figure and axis for plotting
fig, ax = plt.subplots()
# Create a bar chart using data from busy_day
ax.bar(busy_day.index, busy_day.values, color='purple')
# Rotate x-axis labels vertically for better readability
plt.xticks(rotation='vertical')
# Display the matplotlib figure in the Streamlit app
st.pyplot(fig)
# Inside the second column (col2)
with col2:
# Set a header for this section
st.header("Most busy month")
# Call a function (helper.month_activity_map) to get data related to the user's monthly activity and store it in busy_month
busy_month = functions.month_activity_map(selected_user, df)
# Create a new figure and axis for plotting
fig, ax = plt.subplots()
# Create a bar chart using data from busy_month
ax.bar(busy_month.index, busy_month.values, color='orange')
# Rotate x-axis labels vertically for better readability
plt.xticks(rotation='vertical')
# Display the matplotlib figure in the Streamlit app
st.pyplot(fig)
# Setting the title of heatmap
st.title("Weekly Activity Map")
# Set the title for the Streamlit app as "Weekly Activity Map"
st.title("Weekly Activity Map")
# Call a function 'activity_heatmap' from a functions module to generate a heatmap
user_heatmap = functions.activity_heatmap(selected_user, df)
# Creating a new figure and axis for plotting
fig, ax = plt.subplots()
# Generate a heatmap using Seaborn library
ax = sns.heatmap(user_heatmap)
# Display the heatmap using Streamlit's 'st.pyplot' function
st.pyplot(fig)
if selected_user == 'Overall':
# Setting the title for the busiest users section
st.title('Most Busy Users')
# Calling a function 'most_busy_users' from a functions module to find the busiest users
x, new_df = functions.most_busy_users(df)
# Create a new figure and axis for plotting
fig, ax = plt.subplots()
# Create two columns for layout using 'st.columns' function
col1, col2 = st.columns(2)
# In the first column (col1), plot a bar chart
with col1:
ax.bar(x.index, x.values, color='red')
plt.xticks(rotation='vertical')
st.pyplot(fig)
# In the second column (col2), display a dataframe using 'st.dataframe'
with col2:
st.dataframe(new_df)
# Word Cloud
# Set the title of the page to "Wordcloud"
st.title("Wordcloud")
# Creating a word cloud using the 'create_wordcloud' function from the 'functions' module.
df_wc = functions.create_wordcloud(selected_user, df)
# Create a subplot for displaying the word cloud image.
fig, ax = plt.subplots()
# Display the word cloud image in the subplot.
ax.imshow(df_wc)
# Show the word cloud image using 'st.pyplot'.
st.pyplot(fig)
# Most Common Words
# Calculating the most common words
most_common_df = functions.most_common_words(selected_user, df)
# Create a new subplot for displaying the bar chart of most common words.
fig, ax = plt.subplots()
# Create a horizontal bar chart displaying the most common words and their frequencies.
ax.barh(most_common_df[0], most_common_df[1])
# Rotate the x-axis labels vertically for better readability.
plt.xticks(rotation='vertical')
# Set the title for this section.
st.title('Most common words')
# Show the bar chart using 'st.pyplot'.
st.pyplot(fig)
# Emoji Analysis
# Performing emoji analysis using the 'emoji_helper' function from the 'functions' module.
emoji_df = functions.emoji_helper(selected_user, df)
# Setting the title for the emoji analysis section.
st.title("Emoji Analysis")
# Creating a two-column layout for displaying data and a pie chart side by side.
col1, col2 = st.columns(2)
# In the left column, displaying the emoji analysis DataFrame.
with col1:
st.dataframe(emoji_df)
# In the right column, creating a subplot for displaying a pie chart of the top emojis and their percentages.
with col2:
fig, ax = plt.subplots()
# Creating a pie chart showing the top emojis and their percentages.
ax.pie(emoji_df[1].head(), labels=emoji_df[0].head(), autopct="%0.2f")
# Showing the pie chart using 'st.pyplot'.
st.pyplot(fig)