In [1]:
!pip install mwclient




In [1]:
import mwclient #media wiki client
import time

site = mwclient.Site("en.wikipedia.org")
page = site.pages["Bitcoin"]
revs = list(page.revisions())

revs = sorted(revs, key=lambda revs: revs["timestamp"])

from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

def find_sentiment(text):
    sent = sentiment_pipeline([text[:250]])[0]
    score = sent["score"]
    if sent["label"]=="NEGATIVE":
        score *=-1
    return score

edits = {}

for rev in revs:        
    date = time.strftime("%Y-%m-%d", rev["timestamp"])
    if date not in edits:
        edits[date] = dict(sentiments=list(), edit_count=0)
    
    edits[date]["edit_count"] += 1
    
    comment = rev.get("comment", "")
    edits[date]["sentiments"].append(find_sentiment(comment))
from statistics import mean

for key in edits:
    if len(edits[key]["sentiments"]) > 0:
        edits[key]["sentiment"] = mean(edits[key]["sentiments"])
        edits[key]["neg_sentiment"] = len([s for s in edits[key]["sentiments"] if s < 0]) / len(edits[key]["sentiments"])
    else:
        edits[key]["sentiment"] = 0
        edits[key]["neg_sentiment"] = 0
    
    del edits[key]["sentiments"]


No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [2]:
edits

{'2009-03-08': {'edit_count': 4,
  'sentiment': -0.5505250096321106,
  'neg_sentiment': 0.75},
 '2009-08-05': {'edit_count': 1,
  'sentiment': 0.7481209635734558,
  'neg_sentiment': 0.0},
 '2009-08-06': {'edit_count': 2,
  'sentiment': 0.9957457184791565,
  'neg_sentiment': 0.0},
 '2009-08-14': {'edit_count': 1,
  'sentiment': 0.930020809173584,
  'neg_sentiment': 0.0},
 '2009-10-13': {'edit_count': 2,
  'sentiment': -0.22750043869018555,
  'neg_sentiment': 0.5},
 '2009-11-18': {'edit_count': 1,
  'sentiment': 0.8839507699012756,
  'neg_sentiment': 0.0},
 '2009-12-08': {'edit_count': 1,
  'sentiment': -0.9869275689125061,
  'neg_sentiment': 1.0},
 '2009-12-17': {'edit_count': 1,
  'sentiment': -0.9975171089172363,
  'neg_sentiment': 1.0},
 '2010-02-23': {'edit_count': 1,
  'sentiment': -0.9994946718215942,
  'neg_sentiment': 1.0},
 '2010-03-18': {'edit_count': 1,
  'sentiment': 0.8758774995803833,
  'neg_sentiment': 0.0},
 '2010-04-13': {'edit_count': 4,
  'sentiment': 0.84435534477233

In [5]:
!pip install pandas





In [3]:
import pandas as pd
edits_df = pd.DataFrame.from_dict(edits, orient="index")

In [4]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-08,4,-0.550525,0.75
2009-08-05,1,0.748121,0.00
2009-08-06,2,0.995746,0.00
2009-08-14,1,0.930021,0.00
2009-10-13,2,-0.227500,0.50
...,...,...,...
2023-03-03,1,-0.994531,1.00
2023-03-04,4,-0.998548,1.00
2023-03-06,1,0.972153,0.00
2023-03-09,1,-0.981754,1.00


In [5]:
edits_df.index = pd.to_datetime(edits_df.index)

In [6]:
from datetime import datetime
dates = pd.date_range(start = "2009-03-08", end = datetime.today())

In [7]:
edits_df = edits_df.reindex(dates, fill_value=0)

In [8]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-08,4,-0.550525,0.75
2009-03-09,0,0.000000,0.00
2009-03-10,0,0.000000,0.00
2009-03-11,0,0.000000,0.00
2009-03-12,0,0.000000,0.00
...,...,...,...
2023-03-06,1,0.972153,0.00
2023-03-07,0,0.000000,0.00
2023-03-08,0,0.000000,0.00
2023-03-09,1,-0.981754,1.00


In [9]:
rolling_edits = edits_df.rolling(15).mean()

In [10]:
rolling_edits

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-08,,,
2009-03-09,,,
2009-03-10,,,
2009-03-11,,,
2009-03-12,,,
...,...,...,...
2023-03-06,1.200000,-0.349851,0.500000
2023-03-07,1.200000,-0.349851,0.500000
2023-03-08,1.066667,-0.349838,0.466667
2023-03-09,1.066667,-0.348819,0.466667


In [11]:
rolling_edits.dropna()

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-22,0.266667,-0.036702,0.050000
2009-03-23,0.000000,0.000000,0.000000
2009-03-24,0.000000,0.000000,0.000000
2009-03-25,0.000000,0.000000,0.000000
2009-03-26,0.000000,0.000000,0.000000
...,...,...,...
2023-03-06,1.200000,-0.349851,0.500000
2023-03-07,1.200000,-0.349851,0.500000
2023-03-08,1.066667,-0.349838,0.466667
2023-03-09,1.066667,-0.348819,0.466667


In [12]:
rolling_edits.to_csv("wikipedia_edits.csv")