In [10]:
import polars as pl
import matplotlib.pyplot as plt
import zipfile
from datetime import datetime

# Volume of SoR logged onto the TDB over a given timeperiod

In [None]:
#this function measures the volume of statements of reasons (SoR) that were logged on the Transparency Database (TDB) over a given timeperiod 
    #parameters: 1) the name of the platform
    #            2) a list of months [("name", "year", "nbmonth", firstDayMonth, lastDayMonth)], example: [("may", "2025", "05", 1, 31)]
    #TO ADAPT: when using the function yourself, be sure to adapt the beginning of the filepath to where your TDB files are

def volumeCreatedAt(platform: str, listTimePeriod: list[tuple[str, str, str, int, int]]): 
    def volumeDay(platform, year: str, month: str, day: int):
        day = str(day).zfill(2)     #converts day(int) as day(string) with two numbers (01 instead of 1)
        filename = f"/Volumes/Paresseux/{platform}/sor-{platform.lower()}-{year}-{month}-{day}-full.zip"    #(TO ADAPT)
        mother = zipfile.ZipFile(filename)
        nbrows = [
            pl.read_csv(child.open(csvfilename)).shape[0]
            for childname in mother.namelist()
            for child in [zipfile.ZipFile(mother.open(childname))]
            for csvfilename in child.namelist()
            ]
        return sum(nbrows)
    
    sumtotal=0
    for month in listTimePeriod:
        print(f"cumulative sum beginning of {month[0]} = {sumtotal}")
        summonth=0
        for day in range(month[3], month[4]+1):
            summonth=summonth+volumeDay(platform, month[1], month[2], day)
        sumtotal=sumtotal+summonth

    return sumtotal

In [6]:
listThreads=[("september", "2024", "09", 5, 30), ("october", "2024", "10", 1, 31), ("november", "2024", "11", 1, 30),
              ("december", "2024", "12", 1, 31), ("january", "2025", "01", 1, 31), ("february", "2025", "02", 1, 28), 
              ("march", "2025", "03", 1, 12)]
listWhatsApp=[("september", "2024", "09", 17, 30), ("october", "2024", "10", 1, 31), ("november", "2024", "11", 1, 30),
              ("december", "2024", "12", 1, 31), ("january", "2025", "01", 1, 31), ("february", "2025", "02", 1, 28), 
              ("march", "2025", "03", 1, 12)]
listFBIN=[("april", "2024", "04", 1, 30), ("may", "2024", "05", 1, 31), ("june", "2024", "06", 1, 30), ("july", "2024", "07", 1, 31),
        ("august", "2024", "08", 1, 31), ("september", "2024", "09", 1, 30), ("october", "2024", "10", 1, 31), ("november", "2024", "11", 1, 30),
        ("december", "2024", "12", 1, 31), ("january", "2025", "01", 1, 31), ("february", "2025", "02", 1, 28), ("march", "2025", "03", 1, 12)]

In [39]:
volumeCreatedAt("Threads", listThreads)

cumulative sum beginning of september=0
cumulative sum beginning of october=29551
cumulative sum beginning of november=85712
cumulative sum beginning of december=199448
cumulative sum beginning of january=303313
cumulative sum beginning of february=396132
cumulative sum beginning of march=477277


507617

In [8]:
volumeCreatedAt("Whatsapp-channels", listWhatsApp)

cumulative sum beginning of september = 0
cumulative sum beginning of october = 2868
cumulative sum beginning of november = 9545
cumulative sum beginning of december = 17001
cumulative sum beginning of january = 23200
cumulative sum beginning of february = 28619
cumulative sum beginning of march = 35014


38100

In [41]:
volumeCreatedAt("Facebook", listFBIN)

cumulative sum beginning of april=0
cumulative sum beginning of may=32388084
cumulative sum beginning of june=66338045
cumulative sum beginning of july=99695080
cumulative sum beginning of august=139760622
cumulative sum beginning of september=179754834
cumulative sum beginning of october=218866761
cumulative sum beginning of november=260597812
cumulative sum beginning of december=334125404
cumulative sum beginning of january=422250604
cumulative sum beginning of february=491927834
cumulative sum beginning of march=542636204


556346995

In [42]:
volumeCreatedAt("Instagram", listFBIN)

cumulative sum beginning of april=0
cumulative sum beginning of may=2778216
cumulative sum beginning of june=5671237
cumulative sum beginning of july=8241875
cumulative sum beginning of august=11973829
cumulative sum beginning of september=16790703
cumulative sum beginning of october=21372505
cumulative sum beginning of november=30676899
cumulative sum beginning of december=38859448
cumulative sum beginning of january=50623549
cumulative sum beginning of february=62760007
cumulative sum beginning of march=74223768


78723336

In [9]:
total=78723336+556346995+38100+507617
print(total)

635616048


# Volume of SoR over a given timeperiod, whose application_date is between startdate and lastdate

In [None]:
#this function measures the volume of statements of reasons (SoR) over a given timeperiod 
#but ensures that their application_date stays within the first and last dates of the period
    #parameters: 1) the name of the platform
    #            2) a list of months [("name", "year", "nbmonth", firstDayMonth, lastDayMonth)], example: [("may", "2025", "05", 1, 31)]
    #            3-4) the first and last days of the interval as datetimes, example: datetime(2025, 5, 1), datetime(2025, 5, 31)
    #TO ADAPT: when using the function yourself, be sure to adapt the beginning of the filepath to where your TDB files are

def volumeApplicationDate(platform: str, listTimePeriod: list[tuple[str, str, str, int, int]], startdate: datetime, lastdate:datetime): 

    def volumeDay(platform, year: str, month: str, day: int):
        day = str(day).zfill(2)     #converts day(int) as day(string) with two numbers (01 instead of 1)
        filename = f"/Volumes/Paresseux/{platform}/sor-{platform.lower()}-{year}-{month}-{day}-full.zip"    #(TO ADAPT)
        mother = zipfile.ZipFile(filename)
        nbrows = [
            (pl.scan_csv(child.open(csvfilename))
             .filter((pl.col("application_date").str.to_datetime()>=startdate)
                     .and_(pl.col("application_date").str.to_datetime()<=lastdate))
             .select(pl.len()).collect().item())
            for childname in mother.namelist()
            for child in [zipfile.ZipFile(mother.open(childname))]
            for csvfilename in child.namelist()
            ]
        return sum(nbrows)
    
    sumtotal=0
    for month in listTimePeriod:
        print(f"cumulative sum beginning of {month[0]} = {sumtotal}")
        summonth=0
        for day in range(month[3], month[4]+1):
            summonth=summonth+volumeDay(platform, month[1], month[2], day)
        sumtotal=sumtotal+summonth

    return sumtotal

In [14]:
volumeApplicationDate("Instagram", listFBIN, datetime(2024, 4, 1), datetime(2025, 3, 6))

cumulative sum beginning of april = 0
cumulative sum beginning of may = 2705064
cumulative sum beginning of june = 5598085
cumulative sum beginning of july = 8168723
cumulative sum beginning of august = 11900677
cumulative sum beginning of september = 16717551
cumulative sum beginning of october = 21299353
cumulative sum beginning of november = 30603747
cumulative sum beginning of december = 38786296
cumulative sum beginning of january = 50550397
cumulative sum beginning of february = 62686855
cumulative sum beginning of march = 74150616


76614154

In [15]:
volumeApplicationDate("Facebook", listFBIN, datetime(2024, 4, 1), datetime(2025, 3, 6))

cumulative sum beginning of april = 0
cumulative sum beginning of may = 31762710
cumulative sum beginning of june = 65712671
cumulative sum beginning of july = 99069706
cumulative sum beginning of august = 139135248
cumulative sum beginning of september = 179129460
cumulative sum beginning of october = 218241387
cumulative sum beginning of november = 259972438
cumulative sum beginning of december = 333500030
cumulative sum beginning of january = 421625230
cumulative sum beginning of february = 491302460
cumulative sum beginning of march = 542010830


548040877

In [16]:
volumeApplicationDate("Threads", listThreads, datetime(2024, 9, 5), datetime(2025, 3, 6))

cumulative sum beginning of september = 0
cumulative sum beginning of october = 28503
cumulative sum beginning of november = 84664
cumulative sum beginning of december = 198400
cumulative sum beginning of january = 302265
cumulative sum beginning of february = 395084
cumulative sum beginning of march = 476229


490955

In [17]:
volumeApplicationDate("Whatsapp-channels", listWhatsApp, datetime(2024, 9, 17), datetime(2025, 3, 6))

cumulative sum beginning of september = 0
cumulative sum beginning of october = 2488
cumulative sum beginning of november = 9165
cumulative sum beginning of december = 16621
cumulative sum beginning of january = 22820
cumulative sum beginning of february = 28239
cumulative sum beginning of march = 34634


36160

In [19]:
total=76614154+548040877+490955+36160
print(total)

625182146
