In [1]:
import pandas as pd

### Get Articles from GDELT

In [3]:
from gdeltdoc import GdeltDoc, Filters, near, repeat

In [4]:
def get_articles(start_date, end_date, keyword, domain, country, near_n_words, near_keywords, num_records=250):
    f = Filters(
        start_date = start_date,
        end_date = end_date,
        num_records = num_records,
        keyword = keyword,
        domain = domain,
        country = country,
        near = near(near_n_words, *near_keywords),
    )

    gd = GdeltDoc()
    
    articles = gd.article_search(f)
    
    return articles

In [4]:
start = "2017-01-01"
end = "2023-07-01"

dates = [str(date.date()) for date in pd.date_range(start=start, end=end, freq="D")]

In [5]:
articles_df = pd.DataFrame()


for i in range(len(dates)-1):
    start_date = dates[i]
    end_date = dates[i+1]
    
    print(f"{i+1}- {start_date} - {end_date}", end=" | ")
    
    try:
        articles = get_articles(start_date=start_date,
                                end_date=end_date,
                                keyword="ukraine",
                                domain=None,
                                country=None,
                                near_n_words=100,
                                near_keywords=["russia", "nato"])

        articles_df = pd.concat([articles_df, articles]).reset_index(drop=True)

        print(f"Num Articles Collected: {articles.shape[0]}", end = " | ")
        print(f"# of Articles: {articles_df.shape[0]}", end = " | ")
        print(f"# of Articles in English: {articles_df[articles_df['language'] == 'English'].shape[0]}")
    except Exception as e:
        print(f"Error:\n{e}")

1- 2017-01-01 - 2017-01-02 | Num Articles Collected: 207 | # of Articles: 207 | # of Articles in English: 123
2- 2017-01-02 - 2017-01-03 | Num Articles Collected: 250 | # of Articles: 457 | # of Articles in English: 276
3- 2017-01-03 - 2017-01-04 | Num Articles Collected: 250 | # of Articles: 707 | # of Articles in English: 457
4- 2017-01-04 - 2017-01-05 | Num Articles Collected: 217 | # of Articles: 924 | # of Articles in English: 596
5- 2017-01-05 - 2017-01-06 | Num Articles Collected: 250 | # of Articles: 1174 | # of Articles in English: 753
6- 2017-01-06 - 2017-01-07 | Num Articles Collected: 250 | # of Articles: 1424 | # of Articles in English: 873
7- 2017-01-07 - 2017-01-08 | Num Articles Collected: 215 | # of Articles: 1639 | # of Articles in English: 968
8- 2017-01-08 - 2017-01-09 | Num Articles Collected: 227 | # of Articles: 1866 | # of Articles in English: 1088
9- 2017-01-09 - 2017-01-10 | Num Articles Collected: 250 | # of Articles: 2116 | # of Articles in English: 1241
10-

74- 2017-03-15 - 2017-03-16 | Num Articles Collected: 250 | # of Articles: 17481 | # of Articles in English: 10872
75- 2017-03-16 - 2017-03-17 | Num Articles Collected: 250 | # of Articles: 17731 | # of Articles in English: 11053
76- 2017-03-17 - 2017-03-18 | Num Articles Collected: 250 | # of Articles: 17981 | # of Articles in English: 11144
77- 2017-03-18 - 2017-03-19 | Num Articles Collected: 250 | # of Articles: 18231 | # of Articles in English: 11246
78- 2017-03-19 - 2017-03-20 | Num Articles Collected: 119 | # of Articles: 18350 | # of Articles in English: 11296
79- 2017-03-20 - 2017-03-21 | Num Articles Collected: 250 | # of Articles: 18600 | # of Articles in English: 11483
80- 2017-03-21 - 2017-03-22 | Num Articles Collected: 250 | # of Articles: 18850 | # of Articles in English: 11688
81- 2017-03-22 - 2017-03-23 | Num Articles Collected: 250 | # of Articles: 19100 | # of Articles in English: 11895
82- 2017-03-23 - 2017-03-24 | Num Articles Collected: 250 | # of Articles: 19350

216- 2017-08-04 - 2017-08-05 | Num Articles Collected: 240 | # of Articles: 44174 | # of Articles in English: 27481
217- 2017-08-05 - 2017-08-06 | Num Articles Collected: 96 | # of Articles: 44270 | # of Articles in English: 27542
218- 2017-08-06 - 2017-08-07 | Num Articles Collected: 250 | # of Articles: 44520 | # of Articles in English: 27766
219- 2017-08-07 - 2017-08-08 | Num Articles Collected: 250 | # of Articles: 44770 | # of Articles in English: 27931
220- 2017-08-08 - 2017-08-09 | Num Articles Collected: 130 | # of Articles: 44900 | # of Articles in English: 27988
221- 2017-08-09 - 2017-08-10 | Num Articles Collected: 109 | # of Articles: 45009 | # of Articles in English: 28067
222- 2017-08-10 - 2017-08-11 | Num Articles Collected: 94 | # of Articles: 45103 | # of Articles in English: 28115
223- 2017-08-11 - 2017-08-12 | Num Articles Collected: 89 | # of Articles: 45192 | # of Articles in English: 28151
224- 2017-08-12 - 2017-08-13 | Num Articles Collected: 43 | # of Articles: 

358- 2017-12-24 - 2017-12-25 | Num Articles Collected: 193 | # of Articles: 64528 | # of Articles in English: 38926
359- 2017-12-25 - 2017-12-26 | Num Articles Collected: 75 | # of Articles: 64603 | # of Articles in English: 38961
360- 2017-12-26 - 2017-12-27 | Num Articles Collected: 156 | # of Articles: 64759 | # of Articles in English: 39060
361- 2017-12-27 - 2017-12-28 | Num Articles Collected: 109 | # of Articles: 64868 | # of Articles in English: 39121
362- 2017-12-28 - 2017-12-29 | Num Articles Collected: 204 | # of Articles: 65072 | # of Articles in English: 39233
363- 2017-12-29 - 2017-12-30 | Num Articles Collected: 198 | # of Articles: 65270 | # of Articles in English: 39325
364- 2017-12-30 - 2017-12-31 | Num Articles Collected: 74 | # of Articles: 65344 | # of Articles in English: 39370
365- 2017-12-31 - 2018-01-01 | Num Articles Collected: 71 | # of Articles: 65415 | # of Articles in English: 39415
366- 2018-01-01 - 2018-01-02 | Num Articles Collected: 69 | # of Articles: 

429- 2018-03-05 - 2018-03-06 | Num Articles Collected: 166 | # of Articles: 74447 | # of Articles in English: 45139
430- 2018-03-06 - 2018-03-07 | Num Articles Collected: 147 | # of Articles: 74594 | # of Articles in English: 45243
431- 2018-03-07 - 2018-03-08 | Num Articles Collected: 124 | # of Articles: 74718 | # of Articles in English: 45323
432- 2018-03-08 - 2018-03-09 | Num Articles Collected: 132 | # of Articles: 74850 | # of Articles in English: 45404
433- 2018-03-09 - 2018-03-10 | Num Articles Collected: 120 | # of Articles: 74970 | # of Articles in English: 45473
434- 2018-03-10 - 2018-03-11 | Num Articles Collected: 69 | # of Articles: 75039 | # of Articles in English: 45513
435- 2018-03-11 - 2018-03-12 | Num Articles Collected: 66 | # of Articles: 75105 | # of Articles in English: 45545
436- 2018-03-12 - 2018-03-13 | Num Articles Collected: 131 | # of Articles: 75236 | # of Articles in English: 45622
437- 2018-03-13 - 2018-03-14 | Num Articles Collected: 246 | # of Articles

571- 2018-07-25 - 2018-07-26 | Num Articles Collected: 250 | # of Articles: 96651 | # of Articles in English: 58078
572- 2018-07-26 - 2018-07-27 | Num Articles Collected: 250 | # of Articles: 96901 | # of Articles in English: 58215
573- 2018-07-27 - 2018-07-28 | Num Articles Collected: 250 | # of Articles: 97151 | # of Articles in English: 58340
574- 2018-07-28 - 2018-07-29 | Num Articles Collected: 130 | # of Articles: 97281 | # of Articles in English: 58397
575- 2018-07-29 - 2018-07-30 | Num Articles Collected: 69 | # of Articles: 97350 | # of Articles in English: 58434
576- 2018-07-30 - 2018-07-31 | Num Articles Collected: 250 | # of Articles: 97600 | # of Articles in English: 58560
577- 2018-07-31 - 2018-08-01 | Num Articles Collected: 250 | # of Articles: 97850 | # of Articles in English: 58666
578- 2018-08-01 - 2018-08-02 | Num Articles Collected: 250 | # of Articles: 98100 | # of Articles in English: 58811
579- 2018-08-02 - 2018-08-03 | Num Articles Collected: 250 | # of Article

642- 2018-10-04 - 2018-10-05 | Num Articles Collected: 250 | # of Articles: 110697 | # of Articles in English: 63876
643- 2018-10-05 - 2018-10-06 | Num Articles Collected: 250 | # of Articles: 110947 | # of Articles in English: 64044
644- 2018-10-06 - 2018-10-07 | Num Articles Collected: 116 | # of Articles: 111063 | # of Articles in English: 64074
645- 2018-10-07 - 2018-10-08 | Num Articles Collected: 126 | # of Articles: 111189 | # of Articles in English: 64124
646- 2018-10-08 - 2018-10-09 | Num Articles Collected: 166 | # of Articles: 111355 | # of Articles in English: 64165
647- 2018-10-09 - 2018-10-10 | Num Articles Collected: 163 | # of Articles: 111518 | # of Articles in English: 64211
648- 2018-10-10 - 2018-10-11 | Num Articles Collected: 220 | # of Articles: 111738 | # of Articles in English: 64281
649- 2018-10-11 - 2018-10-12 | Num Articles Collected: 250 | # of Articles: 111988 | # of Articles in English: 64332
650- 2018-10-12 - 2018-10-13 | Num Articles Collected: 250 | # o

713- 2018-12-14 - 2018-12-15 | Num Articles Collected: 250 | # of Articles: 124432 | # of Articles in English: 68081
714- 2018-12-15 - 2018-12-16 | Num Articles Collected: 87 | # of Articles: 124519 | # of Articles in English: 68117
715- 2018-12-16 - 2018-12-17 | Num Articles Collected: 69 | # of Articles: 124588 | # of Articles in English: 68141
716- 2018-12-17 - 2018-12-18 | Num Articles Collected: 250 | # of Articles: 124838 | # of Articles in English: 68235
717- 2018-12-18 - 2018-12-19 | Num Articles Collected: 250 | # of Articles: 125088 | # of Articles in English: 68285
718- 2018-12-19 - 2018-12-20 | Error:
Max Recursion depth is reached. JSON can´t be parsed!
719- 2018-12-20 - 2018-12-21 | Num Articles Collected: 250 | # of Articles: 125338 | # of Articles in English: 68343
720- 2018-12-21 - 2018-12-22 | Num Articles Collected: 250 | # of Articles: 125588 | # of Articles in English: 68450
721- 2018-12-22 - 2018-12-23 | Num Articles Collected: 132 | # of Articles: 125720 | # of A

784- 2019-02-23 - 2019-02-24 | Num Articles Collected: 187 | # of Articles: 137393 | # of Articles in English: 71380
785- 2019-02-24 - 2019-02-25 | Num Articles Collected: 190 | # of Articles: 137583 | # of Articles in English: 71426
786- 2019-02-25 - 2019-02-26 | Num Articles Collected: 199 | # of Articles: 137782 | # of Articles in English: 71450
787- 2019-02-26 - 2019-02-27 | Num Articles Collected: 219 | # of Articles: 138001 | # of Articles in English: 71489
788- 2019-02-27 - 2019-02-28 | Num Articles Collected: 250 | # of Articles: 138251 | # of Articles in English: 71519
789- 2019-02-28 - 2019-03-01 | Num Articles Collected: 231 | # of Articles: 138482 | # of Articles in English: 71590
790- 2019-03-01 - 2019-03-02 | Num Articles Collected: 245 | # of Articles: 138727 | # of Articles in English: 71647
791- 2019-03-02 - 2019-03-03 | Num Articles Collected: 127 | # of Articles: 138854 | # of Articles in English: 71664
792- 2019-03-03 - 2019-03-04 | Num Articles Collected: 74 | # of

925- 2019-07-14 - 2019-07-15 | Num Articles Collected: 67 | # of Articles: 162540 | # of Articles in English: 79591
926- 2019-07-15 - 2019-07-16 | Num Articles Collected: 250 | # of Articles: 162790 | # of Articles in English: 79715
927- 2019-07-16 - 2019-07-17 | Num Articles Collected: 178 | # of Articles: 162968 | # of Articles in English: 79763
928- 2019-07-17 - 2019-07-18 | Num Articles Collected: 198 | # of Articles: 163166 | # of Articles in English: 79845
929- 2019-07-18 - 2019-07-19 | Num Articles Collected: 241 | # of Articles: 163407 | # of Articles in English: 79900
930- 2019-07-19 - 2019-07-20 | Num Articles Collected: 201 | # of Articles: 163608 | # of Articles in English: 79933
931- 2019-07-20 - 2019-07-21 | Num Articles Collected: 78 | # of Articles: 163686 | # of Articles in English: 79945
932- 2019-07-21 - 2019-07-22 | Num Articles Collected: 213 | # of Articles: 163899 | # of Articles in English: 80109
933- 2019-07-22 - 2019-07-23 | Num Articles Collected: 183 | # of 

1065- 2019-12-01 - 2019-12-02 | Num Articles Collected: 185 | # of Articles: 184260 | # of Articles in English: 89345
1066- 2019-12-02 - 2019-12-03 | Num Articles Collected: 250 | # of Articles: 184510 | # of Articles in English: 89489
1067- 2019-12-03 - 2019-12-04 | Num Articles Collected: 250 | # of Articles: 184760 | # of Articles in English: 89510
1068- 2019-12-04 - 2019-12-05 | Num Articles Collected: 250 | # of Articles: 185010 | # of Articles in English: 89567
1069- 2019-12-05 - 2019-12-06 | Num Articles Collected: 250 | # of Articles: 185260 | # of Articles in English: 89630
1070- 2019-12-06 - 2019-12-07 | Num Articles Collected: 250 | # of Articles: 185510 | # of Articles in English: 89724
1071- 2019-12-07 - 2019-12-08 | Num Articles Collected: 238 | # of Articles: 185748 | # of Articles in English: 89889
1072- 2019-12-08 - 2019-12-09 | Num Articles Collected: 250 | # of Articles: 185998 | # of Articles in English: 90031
1073- 2019-12-09 - 2019-12-10 | Num Articles Collected: 

1135- 2020-02-09 - 2020-02-10 | Num Articles Collected: 42 | # of Articles: 193917 | # of Articles in English: 93700
1136- 2020-02-10 - 2020-02-11 | Num Articles Collected: 149 | # of Articles: 194066 | # of Articles in English: 93729
1137- 2020-02-11 - 2020-02-12 | Num Articles Collected: 160 | # of Articles: 194226 | # of Articles in English: 93760
1138- 2020-02-12 - 2020-02-13 | Num Articles Collected: 107 | # of Articles: 194333 | # of Articles in English: 93779
1139- 2020-02-13 - 2020-02-14 | Num Articles Collected: 152 | # of Articles: 194485 | # of Articles in English: 93836
1140- 2020-02-14 - 2020-02-15 | Num Articles Collected: 250 | # of Articles: 194735 | # of Articles in English: 93892
1141- 2020-02-15 - 2020-02-16 | Num Articles Collected: 249 | # of Articles: 194984 | # of Articles in English: 93963
1142- 2020-02-16 - 2020-02-17 | Num Articles Collected: 93 | # of Articles: 195077 | # of Articles in English: 93991
1143- 2020-02-17 - 2020-02-18 | Num Articles Collected: 21

1205- 2020-04-19 - 2020-04-20 | Num Articles Collected: 12 | # of Articles: 199503 | # of Articles in English: 95809
1206- 2020-04-20 - 2020-04-21 | Num Articles Collected: 24 | # of Articles: 199527 | # of Articles in English: 95819
1207- 2020-04-21 - 2020-04-22 | Num Articles Collected: 56 | # of Articles: 199583 | # of Articles in English: 95835
1208- 2020-04-22 - 2020-04-23 | Num Articles Collected: 36 | # of Articles: 199619 | # of Articles in English: 95845
1209- 2020-04-23 - 2020-04-24 | Num Articles Collected: 25 | # of Articles: 199644 | # of Articles in English: 95850
1210- 2020-04-24 - 2020-04-25 | Num Articles Collected: 38 | # of Articles: 199682 | # of Articles in English: 95866
1211- 2020-04-25 - 2020-04-26 | Num Articles Collected: 62 | # of Articles: 199744 | # of Articles in English: 95892
1212- 2020-04-26 - 2020-04-27 | Num Articles Collected: 14 | # of Articles: 199758 | # of Articles in English: 95896
1213- 2020-04-27 - 2020-04-28 | Num Articles Collected: 58 | # o

1275- 2020-06-28 - 2020-06-29 | Num Articles Collected: 52 | # of Articles: 204647 | # of Articles in English: 97669
1276- 2020-06-29 - 2020-06-30 | Num Articles Collected: 101 | # of Articles: 204748 | # of Articles in English: 97704
1277- 2020-06-30 - 2020-07-01 | Num Articles Collected: 175 | # of Articles: 204923 | # of Articles in English: 97787
1278- 2020-07-01 - 2020-07-02 | Num Articles Collected: 224 | # of Articles: 205147 | # of Articles in English: 97909
1279- 2020-07-02 - 2020-07-03 | Num Articles Collected: 113 | # of Articles: 205260 | # of Articles in English: 97962
1280- 2020-07-03 - 2020-07-04 | Num Articles Collected: 116 | # of Articles: 205376 | # of Articles in English: 98016
1281- 2020-07-04 - 2020-07-05 | Num Articles Collected: 26 | # of Articles: 205402 | # of Articles in English: 98029
1282- 2020-07-05 - 2020-07-06 | Num Articles Collected: 40 | # of Articles: 205442 | # of Articles in English: 98053
1283- 2020-07-06 - 2020-07-07 | Num Articles Collected: 67 

1345- 2020-09-06 - 2020-09-07 | Num Articles Collected: 75 | # of Articles: 212598 | # of Articles in English: 101025
1346- 2020-09-07 - 2020-09-08 | Num Articles Collected: 130 | # of Articles: 212728 | # of Articles in English: 101047
1347- 2020-09-08 - 2020-09-09 | Num Articles Collected: 161 | # of Articles: 212889 | # of Articles in English: 101109
1348- 2020-09-09 - 2020-09-10 | Num Articles Collected: 250 | # of Articles: 213139 | # of Articles in English: 101201
1349- 2020-09-10 - 2020-09-11 | Num Articles Collected: 120 | # of Articles: 213259 | # of Articles in English: 101227
1350- 2020-09-11 - 2020-09-12 | Num Articles Collected: 104 | # of Articles: 213363 | # of Articles in English: 101269
1351- 2020-09-12 - 2020-09-13 | Num Articles Collected: 44 | # of Articles: 213407 | # of Articles in English: 101278
1352- 2020-09-13 - 2020-09-14 | Num Articles Collected: 42 | # of Articles: 213449 | # of Articles in English: 101295
1353- 2020-09-14 - 2020-09-15 | Num Articles Collec

1415- 2020-11-15 - 2020-11-16 | Num Articles Collected: 3 | # of Articles: 217430 | # of Articles in English: 102693
1416- 2020-11-16 - 2020-11-17 | Num Articles Collected: 39 | # of Articles: 217469 | # of Articles in English: 102708
1417- 2020-11-17 - 2020-11-18 | Num Articles Collected: 37 | # of Articles: 217506 | # of Articles in English: 102720
1418- 2020-11-18 - 2020-11-19 | Num Articles Collected: 79 | # of Articles: 217585 | # of Articles in English: 102734
1419- 2020-11-19 - 2020-11-20 | Num Articles Collected: 101 | # of Articles: 217686 | # of Articles in English: 102750
1420- 2020-11-20 - 2020-11-21 | Num Articles Collected: 80 | # of Articles: 217766 | # of Articles in English: 102774
1421- 2020-11-21 - 2020-11-22 | Num Articles Collected: 51 | # of Articles: 217817 | # of Articles in English: 102782
1422- 2020-11-22 - 2020-11-23 | Num Articles Collected: 24 | # of Articles: 217841 | # of Articles in English: 102794
1423- 2020-11-23 - 2020-11-24 | Num Articles Collected: 

1485- 2021-01-24 - 2021-01-25 | Num Articles Collected: 34 | # of Articles: 222501 | # of Articles in English: 104075
1486- 2021-01-25 - 2021-01-26 | Num Articles Collected: 46 | # of Articles: 222547 | # of Articles in English: 104089
1487- 2021-01-26 - 2021-01-27 | Num Articles Collected: 250 | # of Articles: 222797 | # of Articles in English: 104263
1488- 2021-01-27 - 2021-01-28 | Num Articles Collected: 250 | # of Articles: 223047 | # of Articles in English: 104390
1489- 2021-01-28 - 2021-01-29 | Num Articles Collected: 148 | # of Articles: 223195 | # of Articles in English: 104420
1490- 2021-01-29 - 2021-01-30 | Num Articles Collected: 104 | # of Articles: 223299 | # of Articles in English: 104437
1491- 2021-01-30 - 2021-01-31 | Num Articles Collected: 67 | # of Articles: 223366 | # of Articles in English: 104476
1492- 2021-01-31 - 2021-02-01 | Num Articles Collected: 26 | # of Articles: 223392 | # of Articles in English: 104489
1493- 2021-02-01 - 2021-02-02 | Num Articles Collect

1623- 2021-06-11 - 2021-06-12 | Num Articles Collected: 250 | # of Articles: 244786 | # of Articles in English: 110725
1624- 2021-06-12 - 2021-06-13 | Num Articles Collected: 219 | # of Articles: 245005 | # of Articles in English: 110819
1625- 2021-06-13 - 2021-06-14 | Num Articles Collected: 250 | # of Articles: 245255 | # of Articles in English: 110979
1626- 2021-06-14 - 2021-06-15 | Num Articles Collected: 250 | # of Articles: 245505 | # of Articles in English: 111034
1627- 2021-06-15 - 2021-06-16 | Num Articles Collected: 250 | # of Articles: 245755 | # of Articles in English: 111069
1628- 2021-06-16 - 2021-06-17 | Num Articles Collected: 250 | # of Articles: 246005 | # of Articles in English: 111128
1629- 2021-06-17 - 2021-06-18 | Num Articles Collected: 250 | # of Articles: 246255 | # of Articles in English: 111182
1630- 2021-06-18 - 2021-06-19 | Num Articles Collected: 250 | # of Articles: 246505 | # of Articles in English: 111249
1631- 2021-06-19 - 2021-06-20 | Num Articles Col

1761- 2021-10-27 - 2021-10-28 | Num Articles Collected: 150 | # of Articles: 264541 | # of Articles in English: 116551
1762- 2021-10-28 - 2021-10-29 | Num Articles Collected: 92 | # of Articles: 264633 | # of Articles in English: 116573
1763- 2021-10-29 - 2021-10-30 | Num Articles Collected: 91 | # of Articles: 264724 | # of Articles in English: 116596
1764- 2021-10-30 - 2021-10-31 | Num Articles Collected: 44 | # of Articles: 264768 | # of Articles in English: 116607
1765- 2021-10-31 - 2021-11-01 | Num Articles Collected: 113 | # of Articles: 264881 | # of Articles in English: 116642
1766- 2021-11-01 - 2021-11-02 | Num Articles Collected: 165 | # of Articles: 265046 | # of Articles in English: 116708
1767- 2021-11-02 - 2021-11-03 | Num Articles Collected: 224 | # of Articles: 265270 | # of Articles in English: 116773
1768- 2021-11-03 - 2021-11-04 | Num Articles Collected: 140 | # of Articles: 265410 | # of Articles in English: 116814
1769- 2021-11-04 - 2021-11-05 | Num Articles Collec

1830- 2022-01-04 - 2022-01-05 | Num Articles Collected: 250 | # of Articles: 280044 | # of Articles in English: 121298
1831- 2022-01-05 - 2022-01-06 | Num Articles Collected: 250 | # of Articles: 280294 | # of Articles in English: 121379
1832- 2022-01-06 - 2022-01-07 | Num Articles Collected: 250 | # of Articles: 280544 | # of Articles in English: 121487
1833- 2022-01-07 - 2022-01-08 | Num Articles Collected: 250 | # of Articles: 280794 | # of Articles in English: 121590
1834- 2022-01-08 - 2022-01-09 | Num Articles Collected: 250 | # of Articles: 281044 | # of Articles in English: 121704
1835- 2022-01-09 - 2022-01-10 | Num Articles Collected: 250 | # of Articles: 281294 | # of Articles in English: 121822
1836- 2022-01-10 - 2022-01-11 | Num Articles Collected: 250 | # of Articles: 281544 | # of Articles in English: 121908
1837- 2022-01-11 - 2022-01-12 | Num Articles Collected: 250 | # of Articles: 281794 | # of Articles in English: 121980
1838- 2022-01-12 - 2022-01-13 | Num Articles Col

1899- 2022-03-14 - 2022-03-15 | Num Articles Collected: 250 | # of Articles: 297294 | # of Articles in English: 125767
1900- 2022-03-15 - 2022-03-16 | Num Articles Collected: 250 | # of Articles: 297544 | # of Articles in English: 125893
1901- 2022-03-16 - 2022-03-17 | Num Articles Collected: 250 | # of Articles: 297794 | # of Articles in English: 125982
1902- 2022-03-17 - 2022-03-18 | Num Articles Collected: 250 | # of Articles: 298044 | # of Articles in English: 126061
1903- 2022-03-18 - 2022-03-19 | Num Articles Collected: 250 | # of Articles: 298294 | # of Articles in English: 126153
1904- 2022-03-19 - 2022-03-20 | Num Articles Collected: 250 | # of Articles: 298544 | # of Articles in English: 126238
1905- 2022-03-20 - 2022-03-21 | Num Articles Collected: 250 | # of Articles: 298794 | # of Articles in English: 126351
1906- 2022-03-21 - 2022-03-22 | Num Articles Collected: 250 | # of Articles: 299044 | # of Articles in English: 126485
1907- 2022-03-22 - 2022-03-23 | Num Articles Col

2038- 2022-07-31 - 2022-08-01 | Num Articles Collected: 250 | # of Articles: 331294 | # of Articles in English: 135864
2039- 2022-08-01 - 2022-08-02 | Num Articles Collected: 250 | # of Articles: 331544 | # of Articles in English: 135949
2040- 2022-08-02 - 2022-08-03 | Num Articles Collected: 250 | # of Articles: 331794 | # of Articles in English: 136014
2041- 2022-08-03 - 2022-08-04 | Num Articles Collected: 250 | # of Articles: 332044 | # of Articles in English: 136130
2042- 2022-08-04 - 2022-08-05 | Num Articles Collected: 250 | # of Articles: 332294 | # of Articles in English: 136187
2043- 2022-08-05 - 2022-08-06 | Num Articles Collected: 250 | # of Articles: 332544 | # of Articles in English: 136300
2044- 2022-08-06 - 2022-08-07 | Num Articles Collected: 250 | # of Articles: 332794 | # of Articles in English: 136401
2045- 2022-08-07 - 2022-08-08 | Num Articles Collected: 250 | # of Articles: 333044 | # of Articles in English: 136471
2046- 2022-08-08 - 2022-08-09 | Num Articles Col

2107- 2022-10-08 - 2022-10-09 | Num Articles Collected: 250 | # of Articles: 348544 | # of Articles in English: 140790
2108- 2022-10-09 - 2022-10-10 | Num Articles Collected: 250 | # of Articles: 348794 | # of Articles in English: 140867
2109- 2022-10-10 - 2022-10-11 | Num Articles Collected: 250 | # of Articles: 349044 | # of Articles in English: 140920
2110- 2022-10-11 - 2022-10-12 | Num Articles Collected: 250 | # of Articles: 349294 | # of Articles in English: 140956
2111- 2022-10-12 - 2022-10-13 | Num Articles Collected: 250 | # of Articles: 349544 | # of Articles in English: 141003
2112- 2022-10-13 - 2022-10-14 | Num Articles Collected: 250 | # of Articles: 349794 | # of Articles in English: 141078
2113- 2022-10-14 - 2022-10-15 | Num Articles Collected: 250 | # of Articles: 350044 | # of Articles in English: 141132
2114- 2022-10-15 - 2022-10-16 | Num Articles Collected: 250 | # of Articles: 350294 | # of Articles in English: 141197
2115- 2022-10-16 - 2022-10-17 | Num Articles Col

2177- 2022-12-17 - 2022-12-18 | Num Articles Collected: 250 | # of Articles: 365388 | # of Articles in English: 144759
2178- 2022-12-18 - 2022-12-19 | Num Articles Collected: 250 | # of Articles: 365638 | # of Articles in English: 144832
2179- 2022-12-19 - 2022-12-20 | Num Articles Collected: 250 | # of Articles: 365888 | # of Articles in English: 144914
2180- 2022-12-20 - 2022-12-21 | Num Articles Collected: 250 | # of Articles: 366138 | # of Articles in English: 144969
2181- 2022-12-21 - 2022-12-22 | Num Articles Collected: 250 | # of Articles: 366388 | # of Articles in English: 145028
2182- 2022-12-22 - 2022-12-23 | Num Articles Collected: 250 | # of Articles: 366638 | # of Articles in English: 145065
2183- 2022-12-23 - 2022-12-24 | Num Articles Collected: 250 | # of Articles: 366888 | # of Articles in English: 145105
2184- 2022-12-24 - 2022-12-25 | Num Articles Collected: 250 | # of Articles: 367138 | # of Articles in English: 145168
2185- 2022-12-25 - 2022-12-26 | Num Articles Col

2246- 2023-02-24 - 2023-02-25 | Num Articles Collected: 250 | # of Articles: 382638 | # of Articles in English: 148510
2247- 2023-02-25 - 2023-02-26 | Num Articles Collected: 250 | # of Articles: 382888 | # of Articles in English: 148594
2248- 2023-02-26 - 2023-02-27 | Num Articles Collected: 250 | # of Articles: 383138 | # of Articles in English: 148678
2249- 2023-02-27 - 2023-02-28 | Num Articles Collected: 250 | # of Articles: 383388 | # of Articles in English: 148756
2250- 2023-02-28 - 2023-03-01 | Num Articles Collected: 250 | # of Articles: 383638 | # of Articles in English: 148791
2251- 2023-03-01 - 2023-03-02 | Num Articles Collected: 250 | # of Articles: 383888 | # of Articles in English: 148829
2252- 2023-03-02 - 2023-03-03 | Num Articles Collected: 250 | # of Articles: 384138 | # of Articles in English: 148884
2253- 2023-03-03 - 2023-03-04 | Num Articles Collected: 250 | # of Articles: 384388 | # of Articles in English: 148951
2254- 2023-03-04 - 2023-03-05 | Num Articles Col

2315- 2023-05-04 - 2023-05-05 | Num Articles Collected: 250 | # of Articles: 399638 | # of Articles in English: 153040
2316- 2023-05-05 - 2023-05-06 | Num Articles Collected: 250 | # of Articles: 399888 | # of Articles in English: 153108
2317- 2023-05-06 - 2023-05-07 | Num Articles Collected: 250 | # of Articles: 400138 | # of Articles in English: 153195
2318- 2023-05-07 - 2023-05-08 | Num Articles Collected: 250 | # of Articles: 400388 | # of Articles in English: 153273
2319- 2023-05-08 - 2023-05-09 | Num Articles Collected: 250 | # of Articles: 400638 | # of Articles in English: 153328
2320- 2023-05-09 - 2023-05-10 | Num Articles Collected: 250 | # of Articles: 400888 | # of Articles in English: 153408
2321- 2023-05-10 - 2023-05-11 | Num Articles Collected: 250 | # of Articles: 401138 | # of Articles in English: 153453
2322- 2023-05-11 - 2023-05-12 | Num Articles Collected: 250 | # of Articles: 401388 | # of Articles in English: 153505
2323- 2023-05-12 - 2023-05-13 | Num Articles Col

IndexError: list index out of range

In [6]:
articles_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 413888 entries, 0 to 413887
Data columns (total 8 columns):
 #   Column         Non-Null Count   Dtype 
---  ------         --------------   ----- 
 0   url            413888 non-null  object
 1   url_mobile     413888 non-null  object
 2   title          413888 non-null  object
 3   seendate       413888 non-null  object
 4   socialimage    413888 non-null  object
 5   domain         413888 non-null  object
 6   language       413888 non-null  object
 7   sourcecountry  413888 non-null  object
dtypes: object(8)
memory usage: 25.3+ MB


In [7]:
articles_df.shape

(413888, 8)

In [9]:
articles_df[articles_df["language"] == "English"].shape

(155977, 8)

In [21]:
articles_df[articles_df["language"] == "English"]["domain"].value_counts().head(30)

msn.com                      5720
interfax.com                 3600
sputniknews.com              1295
news.yahoo.com               1159
reuters.com                  1111
sott.net                     1097
washingtonpost.com            969
yahoo.com                     966
newsweek.com                  963
zerohedge.com                 883
nationalinterest.org          881
menafn.com                    855
rferl.org                     854
stripes.com                   796
dailymail.co.uk               781
globalsecurity.org            761
toinformistoinfluence.com     663
unian.info                    663
russiaherald.com              638
wsws.org                      638
kyivpost.com                  617
europesun.com                 607
abcnews.go.com                600
thestar.com.my                591
dw.com                        583
apnews.com                    570
uk.reuters.com                555
nytimes.com                   539
washingtontimes.com           535
112.internatio

In [23]:
#articles_df[articles_df["language"] == "English"]["url"].apply(lambda x: x.split("://")[1].split("/")[0])# == articles_df["domains"]

In [32]:
articles_df.to_csv("data/articles_v1.csv", index=False, encoding="utf-8")