In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import wget
import os
import sys

%matplotlib inline

In [None]:
def bar_progress(current, total, width=80):
  progress_message = f"Downloading: {current / total * 100} [{current} / {total}] bytes"
  # Don't use print() as it will print in new line every time.
  sys.stdout.write("\r" + progress_message)
  sys.stdout.flush()

url = "https://drive.google.com/u/0/uc?id=1cnE-XBZlDGAIZtGbvA5ra07KyRRacWDU&export=download&confirm=t"
cur_dir = os.getcwd()
file_path = cur_dir+"\depression.csv"
if not os.path.exists(file_path):
    wget.download(url, file_path, bar=bar_progress)

In [None]:
data = pd.read_csv("depression.csv", encoding='latin-1')
data.columns = ["target", "id", "date", "flag", "user", "text"]
data.head(5)

In [None]:
data['target'].unique()

In [None]:
data['date'].str[11:13].unique()

In [None]:
data['target'][data['date'].str[8:10]=='06'].value_counts()

In [None]:
hours = ['00','01','02','03','04','05','06','07','08','09','10','11','12',
         '13','14','15','16','17','18','19','20','21','22','23']
pos_tweets = []
neg_tweets = []

for i in hours:
    hour_data = data['target'][data['date'].str[11:13]==i].value_counts()
    pos_tweets.append(hour_data[4])
    neg_tweets.append(hour_data[0])
    
print(pos_tweets)
print(neg_tweets)

In [None]:
plt.figure()
data.boxplot(column="target")
data['target'] = data['target'] / 4
plt.figure()
data.boxplot(column="target")

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
fig.set_figwidth(15)
fig.set_figheight(10)

x_axis = np.arange(24)

pos = ax.bar(x_axis - 0.2, pos_tweets, width=0.4, color='b', align="center")
neg = ax.bar(x_axis + 0.2, neg_tweets, width=0.4, color='r', align="center")

plt.xticks(x_axis, hours)
plt.xlabel("hours")
plt.ylabel("number of tweets")
plt.legend((pos[0], neg[0]), ("positive tweets", "negative tweets"))

plt.show()

In [None]:
ep_10_data_10 = {'loss': [0.6590960025787354,
  0.5659002065658569,
  0.517277717590332,
  0.49403226375579834,
  0.4811204671859741,
  0.47261419892311096,
  0.4668677747249603,
  0.4610331952571869,
  0.4581807851791382,
  0.4550591707229614],
 'accuracy': [0.5253280997276306,
  0.6755156517028809,
  0.7309765815734863,
  0.7533593773841858,
  0.7645702958106995,
  0.7724375128746033,
  0.7773905992507935,
  0.7817343473434448,
  0.7844609618186951,
  0.785476565361023],
 'val_loss': [0.7283273339271545,
  0.8221195340156555,
  0.8973167538642883,
  0.9496384859085083,
  0.9853083491325378,
  1.0096911191940308,
  1.0275732278823853,
  1.0539478063583374,
  1.0547879934310913,
  1.0665644407272339],
 'val_accuracy': [0.5020624995231628,
  0.49906250834465027,
  0.4971249997615814,
  0.4984999895095825,
  0.4971874952316284,
  0.49787500500679016,
  0.4972499907016754,
  0.4976874887943268,
  0.49825000762939453,
  0.4989374876022339]}

ep_10_data_25 = {'loss': [0.6173171401023865,
  0.513627290725708,
  0.4856550991535187,
  0.47431230545043945,
  0.468433141708374,
  0.4636176526546478,
  0.46068060398101807,
  0.4580445885658264,
  0.4559868276119232,
  0.45473334193229675],
 'accuracy': [0.5926874876022339,
  0.7339458465576172,
  0.761104166507721,
  0.7715416550636292,
  0.7765458226203918,
  0.7796791791915894,
  0.7819166779518127,
  0.7837291955947876,
  0.7853083610534668,
  0.7860958576202393],
 'val_loss': [0.5385001301765442,
  0.4927052855491638,
  0.47934219241142273,
  0.47441160678863525,
  0.4747435450553894,
  0.472409725189209,
  0.47944873571395874,
  0.4705291986465454,
  0.47034257650375366,
  0.4706830680370331],
 'val_accuracy': [0.7035750150680542,
  0.7652000188827515,
  0.7643874883651733,
  0.7757874727249146,
  0.7562124729156494,
  0.7840250134468079,
  0.7444000244140625,
  0.7707499861717224,
  0.7795000076293945,
  0.7733374834060669]}

ep_10_data_35 = {'loss': [0.5914682149887085,
  0.49547553062438965,
  0.47668275237083435,
  0.4691717028617859,
  0.4644348919391632,
  0.4617554545402527,
  0.4599422812461853,
  0.4582114517688751,
  0.4576795995235443,
  0.45645034313201904],
 'accuracy': [0.6298690438270569,
  0.7523839473724365,
  0.7702767848968506,
  0.7773958444595337,
  0.7808035612106323,
  0.7823035717010498,
  0.784166693687439,
  0.7849583625793457,
  0.7855535745620728,
  0.7860148549079895],
 'val_loss': [0.5149147510528564,
  0.48951980471611023,
  0.4754367768764496,
  0.4721027910709381,
  0.48066356778144836,
  0.4696444869041443,
  0.4701799750328064,
  0.4696551561355591,
  0.47491297125816345,
  0.47079795598983765],
 'val_accuracy': [0.7486071586608887,
  0.7814196348190308,
  0.7810893058776855,
  0.784428596496582,
  0.791357159614563,
  0.7725625038146973,
  0.7691875100135803,
  0.7727500200271606,
  0.7905892729759216,
  0.7689642906188965]}

ep_15_data_10 = {'loss': [0.6711179614067078,
  0.5903862714767456,
  0.5370938777923584,
  0.507082998752594,
  0.489980548620224,
  0.4779776930809021,
  0.4697960317134857,
  0.4636218249797821,
  0.45849254727363586,
  0.4542348086833954,
  0.4509323239326477,
  0.4471672475337982,
  0.4443146288394928,
  0.4422462582588196,
  0.43974530696868896],
 'accuracy': [0.5094062685966492,
  0.6370729207992554,
  0.7085416913032532,
  0.7394999861717224,
  0.7550937533378601,
  0.7660416960716248,
  0.7710520625114441,
  0.7787187695503235,
  0.7826145887374878,
  0.7845104336738586,
  0.7880416512489319,
  0.7893854379653931,
  0.792927086353302,
  0.7938437461853027,
  0.7964166402816772],
 'val_loss': [0.6287435293197632,
  0.5614779591560364,
  0.5256362557411194,
  0.5067121386528015,
  0.4960436224937439,
  0.49202218651771545,
  0.4870683550834656,
  0.4875309467315674,
  0.4834364056587219,
  0.4815909266471863,
  0.4850040376186371,
  0.4825076162815094,
  0.4812360405921936,
  0.4815331697463989,
  0.4818197190761566],
 'val_accuracy': [0.546875,
  0.6763749718666077,
  0.717843770980835,
  0.7356562614440918,
  0.7544062733650208,
  0.7734375,
  0.7712187767028809,
  0.7793437242507935,
  0.7592187523841858,
  0.7703437209129333,
  0.7511562705039978,
  0.7794374823570251,
  0.7750937342643738,
  0.7683749794960022,
  0.773812472820282]}

ep_15_data_25 = {'loss': [0.6176750063896179,
  0.5139123201370239,
  0.4864250719547272,
  0.4748062193393707,
  0.46861639618873596,
  0.46404802799224854,
  0.46056121587753296,
  0.4582570493221283,
  0.45709219574928284,
  0.45517751574516296,
  0.4540956914424896,
  0.45311257243156433,
  0.45252951979637146,
  0.45183658599853516,
  0.45114651322364807],
 'accuracy': [0.590541660785675,
  0.7331791520118713,
  0.7600208520889282,
  0.7707666754722595,
  0.7766125202178955,
  0.7796375155448914,
  0.7824249863624573,
  0.783887505531311,
  0.7849125266075134,
  0.7868833541870117,
  0.7878416776657104,
  0.7875208258628845,
  0.7886124849319458,
  0.7894583344459534,
  0.7893333435058594],
 'val_loss': [0.5415642857551575,
  0.492264062166214,
  0.47914907336235046,
  0.47242051362991333,
  0.4714793562889099,
  0.46836167573928833,
  0.46786099672317505,
  0.46845096349716187,
  0.46756064891815186,
  0.4700506925582886,
  0.47289565205574036,
  0.46841683983802795,
  0.46988070011138916,
  0.4695386588573456,
  0.4697989225387573],
 'val_accuracy': [0.6818374991416931,
  0.7663624882698059,
  0.777162492275238,
  0.7710250020027161,
  0.785812497138977,
  0.7813624739646912,
  0.7828249931335449,
  0.7699124813079834,
  0.7827374935150146,
  0.7640374898910522,
  0.7892374992370605,
  0.7750375270843506,
  0.7684375047683716,
  0.784375011920929,
  0.7723374962806702]}

fig = plt.figure()
fig.set_figwidth(18)
fig.set_figheight(12)

plt.subplot(2,3,1)
plt.plot(ep_10_data_10['loss'])
plt.plot(ep_10_data_10['val_loss'])
plt.title("10% of data with 10 epochs")
plt.xlabel("epochs")
plt.ylabel("loss")

plt.subplot(2,3,2)
plt.plot(ep_10_data_25['loss'])
plt.plot(ep_10_data_25['val_loss'])
plt.title("25% of data with 10 epochs")
plt.xlabel("epochs")
plt.ylabel("loss")

plt.subplot(2,3,3)
plt.plot(ep_10_data_35['loss'])
plt.plot(ep_10_data_35['val_loss'])
plt.title("35% of data with 10 epochs")
plt.xlabel("epochs")
plt.ylabel("loss")

plt.subplot(2,3,4)
plt.plot(ep_15_data_10['loss'])
plt.plot(ep_15_data_10['val_loss'])
plt.title("10% of data with 15 epochs")
plt.xlabel("epochs")
plt.ylabel("loss")

plt.subplot(2,3,5)
plt.plot(ep_15_data_25['loss'])
plt.plot(ep_15_data_25['val_loss'])
plt.title("25% of data with 15 epochs")
plt.xlabel("epochs")
plt.ylabel("loss")