# Medium scraper

Now at scrape_medium.py
Inspired by https://github.com/lazuxd/medium-scraping/blob/master/medium_scraping.ipynb 

## Get URLs of articles
Have to first search by month, then if there are more than 10 articles listed, try days

In [6]:
import requests
from bs4 import BeautifulSoup
import pdb
from calendar import Calendar
from datetime import date, timedelta

tag = 'white-supremacy'
url_base = 'https://medium.com/tag/{}/archive/{}/{:02d}'
article_urls = []
c = Calendar()
# for year in range(2010, 2022):
for year in range(2015, 2022):
    for month in range(1, 13):
        month_url = url_base.format(tag, year, month)
        # print(url)

        response = requests.get(month_url, allow_redirects=False)

        if response.status_code != 200:
            print(f"No articles found for {month_url}")
            continue
        soup = BeautifulSoup(response.content)
        articles = soup.find_all("div", class_="postArticle postArticle--short js-postArticle js-trackPostPresentation js-trackPostScrolls")
        if len(articles) < 10: # Fewer than 10 means don't have to search by day
            print(f'{len(articles)} articles found at {month_url}')
            article_urls += [article.find_all("a")[3]['href'].split('?')[0] for article in articles]
        else:
            calendar_mention = soup.find('p', class_='u-marginBottom40')
            if calendar_mention.text == '': # exactly 10 articles
                print(f'{len(articles)} articles found at {month_url}')
                article_urls += [article.find_all("a")[3]['href'].split('?')[0] for article in articles]
            else: # Try calendar date URLs
                for date in [d for d in c.itermonthdates(year, month) if d.month == month]:
                    day_url = month_url + '/{:02d}'.format(date.day)
                    response = requests.get(day_url, allow_redirects=False)
                    if response.status_code != 200:
                        print(f"No articles found for {day_url}")
                        continue
                    soup = BeautifulSoup(response.content)
                    articles = soup.find_all("div", class_="postArticle postArticle--short js-postArticle js-trackPostPresentation js-trackPostScrolls")
                    print(f'{len(articles)} articles found at {day_url}')
                    article_urls += [article.find_all("a")[3]['href'].split('?')[0] for article in articles]

No articles found for https://medium.com/tag/white-supremacy/archive/2015/01
1 articles found at https://medium.com/tag/white-supremacy/archive/2015/02
No articles found for https://medium.com/tag/white-supremacy/archive/2015/03
No articles found for https://medium.com/tag/white-supremacy/archive/2015/04
1 articles found at https://medium.com/tag/white-supremacy/archive/2015/05
2 articles found at https://medium.com/tag/white-supremacy/archive/2015/06
5 articles found at https://medium.com/tag/white-supremacy/archive/2015/07
2 articles found at https://medium.com/tag/white-supremacy/archive/2015/08
2 articles found at https://medium.com/tag/white-supremacy/archive/2015/09
10 articles found at https://medium.com/tag/white-supremacy/archive/2015/10
10 articles found at https://medium.com/tag/white-supremacy/archive/2015/11
No articles found for https://medium.com/tag/white-supremacy/archive/2015/11/01
No articles found for https://medium.com/tag/white-supremacy/archive/2015/11/02
No arti

KeyboardInterrupt: 

## Get article text

In [7]:
len(article_urls)

533

In [18]:
import time
import pandas as pd
from tqdm.notebook import tqdm

outlines = []
for url in tqdm(article_urls):
    try:
        response = requests.get(url)
        if response.status_code in [520, 524]:
            sleep_time = 30
            tqdm.write(f'{response.status_code} error, waiting {sleep_time} seconds and then trying again')
            time.sleep(sleep_time)
        elif response.status_code == 200:
            soup = BeautifulSoup(response.content)
            title = ''
            title_search = soup.find('h1', class_='pw-post-title') 
            if title_search is not None:
                title = title_search.text
            paras = [p.text for p in soup.find_all('p', class_='pw-post-body-paragraph')]
            outlines.append({'url': url, 'title': title, 'text': '\n'.join([title] + paras).strip()})
            time.sleep(0.5)
        else: 
            pdb.set_trace()
    except Exception as e:
        tqdm.write(f'{str(e)}\n\t for {url}')
        continue

In [19]:
articles = pd.DataFrame(outlines)
articles

Unnamed: 0,url,title,text
0,https://medium.com/@hdyerjr/florida-sheriff-te...,Florida Sheriff Tells Drivers to Run over Stre...,Florida Sheriff Tells Drivers to Run over Stre...
1,https://medium.com/@absurdistwords/brunch-an-a...,Brunch: An Allegory of White Supremacy,Brunch: An Allegory of White Supremacy\nFADE I...
2,https://medium.com/news-and-politics/to-my-fri...,"To My Friends Who Have Not Posted, Acknowledge...","To My Friends Who Have Not Posted, Acknowledge..."
3,https://medium.com/@enajlaura/thoughts-on-the-...,Thoughts on the Charleston Shooting (Or: White...,Thoughts on the Charleston Shooting (Or: White...
4,https://medium.com/@Catharticme/teaching-lesso...,Teaching Lessons of White Supremacy in Prime-T...,Teaching Lessons of White Supremacy in Prime-T...
...,...,...,...
134,https://bullshit.ist/running-on-high-velocity-...,Running on “high-velocity bitterness” on my “o...,Running on “high-velocity bitterness” on my “o...
135,https://medium.com/@abelateiner/the-gift-of-wh...,The Gift of White Burnout,The Gift of White Burnout\nBurnout saved my li...
136,https://medium.com/@YcfMan/here-is-my-solution...,,Here is my solution to staying alive: get a re...
137,https://medium.com/@joselromero/the-one-absolu...,The One Absolute Reason Why Racism Is Bullshit!,The One Absolute Reason Why Racism Is Bullshit...


In [20]:
pd.set_option('display.max_colwidth', None)
articles.head()

Unnamed: 0,url,title,text
0,https://medium.com/@hdyerjr/florida-sheriff-tells-drivers-to-run-over-street-protesters-bebdddde9d82,Florida Sheriff Tells Drivers to Run over Street Protesters,"Florida Sheriff Tells Drivers to Run over Street Protesters\nAs reported by Counter Currentnews.com, the sheriff of Florida’s Palm Beach County is being called on the carpet for making incendiary remarks at a community meeting last month in Boynton Beach. It seems Sheriff Ric Bradshaw urged the attendees that they should use their vehicles as weapons against protesters — “violent thugs” in his parlance — who may be blocking their path."
1,https://medium.com/@absurdistwords/brunch-an-allegory-of-white-supremacy-1e5b9344dd5e,Brunch: An Allegory of White Supremacy,"Brunch: An Allegory of White Supremacy\nFADE IN\nINT CLASSY BRUNCH SPOT IN NYC FINANCIAL DISTRICT. A COMPANY CEO AND A VICE PRESIDENT ARE SEATED AT A TABLE BY THE WINDOW. A HOMELESS WOMAN BEGINS TO POUND ON THE GLASS AND SCREAM OBSCENITIES.\nCEO: <SHOOS WOMAN THROUGH GLASS> Get out of here!\nWOMAN: <YELLS SOMETHING INAUDIBLE THROUGH THE GLASS>\nVP:< LAUGHING> What the hell is THAT about?\nCEO: Nothing. This crazy lady yells at me every time I eat here. She’s really nuts. I usually have to call the cops on her.\nVP: <LAUGHING> Jesus! Do you know her? Why does she pick on you?\nCEO: Know her? No. Not really…\nVP: Not really?\nCEO: Well. I mean she grew up in my house, but that was a long time ago.\nVP: Wait. WHAT? She used to LIVE with you?\n<WOMAN BANGS ON WINDOW>\nCEO: SHUT UP CRAZY! Not me really. I mean it was my father’s house.\nVP: Huh? I’m confused.\nCEO: Well. My father was a great man. But in his youth he had some troublesome moments\nVP: I don’t get it.\nCEO: Ok. So funny story. When my dad first came to NYC, He was broke. He lived off of the charity of others.\nVP: YOUR father was poor? I don’t believe it,\nCEO: Yes indeed. A true rags to riches story. See. One night he was sleeping on a park bench, and a local man offered to let him sleep at his place.\nVP: Ok…\nCEO: My father jumped at the chance. They fed him and gave him a warm bed. They let him stay two weeks rent free. He fell in LOVE with that house.\nVP: What does this have to do with her?\n<POINTS AT SCREAMING WOMAN>\nCEO: Patience. Let me finish. So like I said. My dad loved the house so much that he slaughtered the man and his wife in their sleep, buried them under the dirt floor in the basement and kept living there. He forged a few signatures on a few things and made it look like they’d moved to Europe.\nVP: <STUNNED> You’re… kidding. Right?\nCEO: Nope. It was a little underhanded, I admit. Anyway. The place was WAY too big for him to keep up himself, so he needed help.\nVP: UH HUH…\nCEO: Anyway. So he scouted the city till he found the perfect target.\nVP: Target?\nCEO: To be the maid. You know? So he’s at the park one evening when he sees this beautiful young woman with her husband and newborn baby sitting by the lake. And he figures “Perfect”. So. My dad.. Shrewd guy he is… Slices the man’s throat, puts his wife into the back of his trunk and takes her back to his place.\nVP: <AGHAST> That’s… that’s…\nCEO: A little tacky, I know. But it was a different time. Anyway. So yeah, he brings the woman back home. And she, you know. Keeps trying to escape, because you know how women are, am I right? <UNRECIPROCATED HIGH FIVE>. So of course he’s got to break her, right? So as he told it, it’s like WEEKS of rapes and beatings. Just to show her who’s boss. Luckily for me, he knocked her up. Nine months later, there’s ME!\nVP: What about the other baby?\nCEO: Oh. He didn’t really care about her at all. She wasn’t his kid. But I was. Anyway. So eventually he flips the house, and uses the money to buy a few more properties and eventually had enough money to start this company.\nVP: I had no idea that’s how this company got started.\nCEO: Yeah. Totally. So eventually. He and the maid —\nVP: The maid. You mean your MOTHER?\nCEO: Whatever. He and the maid got older. She never left him. All that torture was really effective. But she died.. then he died and it was just me and the maid’s daughter. I never had anything against her really. We were close as kids. But I didn’t want to enable her. So i kicked her out.\nVP: You mean… your sister?\nCEO: Whatever. So when he died, he left everything to me. The company, the house, everything. The maid’s daughter was really mad about that… I don’t know why… she never did anything useful. she didn’t deserve it. She tried to sue me repeatedly for part of the inheritance. Said something about her having claim to it too. Spent her life savings suing me. I knew the judge though and he assured me that he’d have my back. So the suit failed. I savaged her in the press. Nobody would hire her. Sad really, She never really made anything out of herself.\nVP: Wait… That woman isn’t…\nCEO: Hold on one second <PULLS OUT PHONE. DIALS 911> Yeah. Hi. Yes yes. Its me again. I KNOW. Like clockwork right? Could you come get her? <HANGS UP PHONE> Sorry. I like to have her cleared out before I leave. One time she hit me with a rock. It hurt. Left a scrape and everything.\nVP: I’m sorry. Did you just call 911 on your own sister?\nCEO: MAID’S KID. And yes. She’s a violent loser and needs to take responsibility for her life. Look at me. I’m a successful CEO and I never got a damn bit of help.\nVP: But your father left you all his blood money.\nCEO: AHEM. Don’t forget who you’re talking to. Both my father and I made our way without help. Without us you wouldn’t have a job. Listen. I’m not a monster. I’ve given her change, on really cold nights, I offer her $5 for a shelter. <WOMAN SCREAMS AS IS BEATEN AND DRAGGED AWAY BY COPS> Damned low-lives.\nVP: I… um. Quit. CHECK PLEASE.\nFADE OUT"
2,https://medium.com/news-and-politics/to-my-friends-who-have-not-posted-acknowledged-shared-or-commented-about-the-tragedy-in-a00798464010,"To My Friends Who Have Not Posted, Acknowledged, Shared or Commented About the Tragedy in Charleston","To My Friends Who Have Not Posted, Acknowledged, Shared or Commented About the Tragedy in Charleston\n[Edit: June 20th — I wrote this very soon after the shootings occured. I was in a raw state of despair. Since then, I have realized that these words, although passionately felt, have blamed and made assumptions about others. In retrospect, I think that what I was feeling was alone in my grief and outrage and despair. I wanted to feel less alone. I wanted to know who my people were. I wanted to know who was feeling what I felt. Rather than just delete this posting, I’m going to leave it stand, but with the acknowledgment that one can never know what others are truly thinking, feeling or doing, nor do I have the right to dictate what others post on their social media. I can only hope that others who share my feelings will reach out, somehow.]\nIt’s been twenty-four hours now since it happened. Since a white supremacist terrorist entered African Wesleyan Methodist Episcopal Church in Charleston, South Carolina, and shot to death 9 people in a prayer meeting.\nI was driving home last night after I had worked, then gone for a run along the shore. I was sweaty and hungry. I stopped to get a sandwich because I didn’t want to wait to eat until I got home. It was just starting to get dark. I ate my sandwich in my car and right before starting the ignition, I checked the Facebook feed on my phone. One of my friends linked to the first news report with two words: “Oh no.” I read the news and doubled over.\nWhat were you doing when you first heard or read this news? Were you putting your children to bed? Just drinking your morning coffee? What was your reaction? Maybe it was horror. Disbelief. Grief. Despair. Sorrow. Or maybe it was, “Oh, well. That’s sad. But that doesn’t really have anything to do with me.” And you carried on with your evening, or your day, without skipping much of a beat.\nIf you felt horror/disbelief/grief/despair/sorrow, did you share those feelings? If you were rendered wordless, did you share some words that resonated with you? It’s been almost a full day now. It’s taken some people in my social media feed a few hours to gather themselves, and many are beginning to respond, by recognizing and honoring the names and lives of those who were killed, by showing ways we can directly support the members of AME church, either financially or with a card. But it seems as if others haven’t gathered at all.\nAnd I can’t help but wonder. What is going on with the people who are seemingly merrily carrying on with their weeks, looking forward to fun weekend plans, posting about their workouts or meals or dogs or babies and vacations? I’m not suggesting that we all curl into a fetal position and withdraw from life. (although we might feel like it) But a moment, a few hours, or a day of silence or reflection??\nI try to imagine why some people aren’t mentioning this terrible, tragic act. Are these people who would (and did) have lots to say about 9/11, or the Boston Marathon bombing, because they felt personally affected? Because 9/11 was about “all Americans” and the Boston Marathon bombing was about “runners?” And Sandy Hook, of course, because children. But these were black people. Praying. In a church.\nIs it that you don’t go to church? You don’t pray? You’re not black? You don’t live in South Carolina? This doesn’t have anything to do with you? WHAT?\nPerhaps you have a professional life to consider. Maybe you’re a blogger or an author, a business owner or a lawyer or something, and you think, I don’t post about politics. But this isn’t about politics. This is about human beings.\nI’m going to say it. I’m going to say what I’ve been thinking all day. I’ve been virtually holding hands and weeping with those friends who have chosen to pay attention, and to comment on, what happened in Charleston last night. Who have been visibly shaken, shattered, and completely taken down by this. The only reason I can tell they are visibly shaken, is that they’ve shown it, if just to post one word in a comment or on their wall. “No.”\nThe rest of you? I have to say. It pretty much looks like you don’t give a shit. I don’t know what your reasons are. Maybe they’re good reasons. Like maybe you sustained a terrible accident and your fingers were broken. Maybe there’s a white supremacist holding a gun to your head, and they’re forcing you to post all that superficial shit. Maybe you are a white supremacist yourself, and I just don’t know you well enough to realize it.\nBut think about it. Think about if this was your place of worship. Your school, or your child’s school. Your beloved community. Your people. Whoever “your people” are. Think about if someone you loved, or felt an affinity with, was gunned down while doing the most innocent, peaceful thing in the world. Think about how you would feel if they were massacred. And then think about how you would feel if you scanned your social media and witnessed your friends not mentioning it at all.\nI just don’t understand it. A few things come to mind. There’s this:\nAnd there’s this.\nand this.\nI’m not every single person I know to spend hours writing in-depth analysis or op-eds about racism, terrorism, gun control. Unless they are moved to do so. But: just a small gesture of acknowledgement. Just one word or image. Solidarity works on Instagram, too. Just any recognition of the horror. Or maybe, a recognition of the humanity of the lives that were taken away, like librarian Cynthia Hurd who was “dedicated to books and people.” Share it! It won’t hurt. Just any acknowledgement that you are not going about your day, business as usual. That you, as a human, were moved by the suffering of other humans. Please.\nIt’s been interesting and heartbreaking to watch social media today. And to recognize that so many of my thousand-plus “friends” truly are: only acquaintances."
3,https://medium.com/@enajlaura/thoughts-on-the-charleston-shooting-or-white-supremacy-continues-to-be-awful-pervasive-8ffba481a8f2,"Thoughts on the Charleston Shooting (Or: White Supremacy Continues to Be Awful, Pervasive)","Thoughts on the Charleston Shooting (Or: White Supremacy Continues to Be Awful, Pervasive)\nI’ve spent the past few days thinking a lot about the shooting in Charleston. While as a white person I think there are more important voices than mine to be heard on this subject, and I will link to some of them, I also decided that one more voice calling out structural racism is never a bad thing.\nOne of the first things I noticed when reading the media coverage of this shooting was that before the shooter was even definitely identified- when he was just a white male suspect- commentators were already wondering if he was mentally ill. Contrast this with the way non-white victims of white violence are described as thugs, criminals, or — my favorite — simply as “no angels.”\nThis hypocrisy shows that our society continues to have very clearly defined narratives that determine who is a victim and who is not, and these roles are assigned based on race.\nBut there are several other reasons this reference to the shooter’s alleged mental illness is a problem.\nOne is that overwhelmingly, people with mental illnesses are much more likely to be victims of abuse and violence than its perpetrators. Even in cases where they do act out violently, they are much more likely to engage in self-harm than to attack others.\nThat is not to necessarily say that the Charleston shooter did not have a mental illness. I’m not a medical professional, nor have I ever met this man, so obviously I cannot diagnose him or prove that he did not have any mental health issues. It is to say that even if he was mentally ill, that is not what caused him to carry out this attack.\nIn fact, the shooter has told us repeatedly exactly what did motivate him to commit mass murder against a peaceful group of black people who welcomed him to pray with them. He said it in a statement to police and apparently in a manifesto written prior to the attack.\nAttributing his mass murder to mental illness does three things: 1. It stigmatizes and scapegoats the mentally ill; 2. It erases his true intentions and absolves him of personal responsibility for his crime; and 3. It reduces racism to an aberrant quality present in a few isolated individuals, rather than recognizing its ingrained status in the structure of American society and institutions.\nThis shooter grew up in a culture that allowed and encouraged him to revere the Confederacy, apartheid South Africa, and white-ruled Rhodesia. And we continue to see less obvious examples of that racist culture in the way he is treated.\nBesides the rush to excuse his behavior by blaming it on mental illness, we have the simple fact that a white man who was known to be armed and willing to kill — who literally just committed mass murder- was taken alive. Not only that, but when he was arrested, police put him in a bulletproof vest. In other words, when they captured a man who had just murdered 9 black victims in cold blood, the officers’ first instinct was to protect him (against a violent backlash they assumed would come, but has failed to materialize.) Meanwhile, in the cases of Mike Brown, Eric Garner, Tamir Rice, and countless others, officers’ first instinct was to use deadly force.\nThis shooting, and its aftermath, also reminded me of another shooting, at UC Santa Barbara. In that case, as in this one, the shooter was a white male who felt threatened and offended by a changing society that he felt was beginning to value “others” over himself. We once again had a manifesto expressing his rage and entitlement, and clearly defining his target population. In this case, it was women, who the shooter felt did not grant him the respect and sex he felt he deserved. Despite the fact that he literally wrote his motive down for us, we were still treated to discourses on how his mental health must have steered his actions, rather than blaming his misogyny, and a society that encouraged him to believe that he was entitled to women’s bodies.\nWhile racism and misogyny are not the same, they often intersect in critical ways. One of the things the Charleston shooter used to justify his actions was a preoccupation with white women’s “purity,” and a desire to eliminate the threat he felt black men posed to it. And the best evidence of the persistence of white supremacy and white male privilege is the fact that even when they commit mass atrocities, people still look for ways to excuse white men.\nRecommended Reading:\nCharleston Church Massacre: The Violence White America Must Answer For\nWhite Fragility, Silence, and Supremacy: Why All White Hands Are Bloody\nThugs and Terrorists Have Attacked Black Churches for Generations\nThe Charleston Shooting is Part of an Unspoken History of Terror in Black Churches\nCharleston Syllabus\nOriginally published at intlaffair.wordpress.com on June 21, 2015."
4,https://medium.com/@Catharticme/teaching-lessons-of-white-supremacy-in-prime-time-blackrifice-in-the-post-apocalyptic-world-of-the-53213182050f,Teaching Lessons of White Supremacy in Prime-Time: Blackrifice in the Post-Apocalyptic World of the CW’s The 100,"Teaching Lessons of White Supremacy in Prime-Time: Blackrifice in the Post-Apocalyptic World of the CW’s The 100\nThe main problem with the CW’s The 100 is a reliance on blackrifice to advance the motivations of the white characters in the narrative.\nSo, in The 100, the Earth has been devastated by nuclear war. 12 space stations that were orbiting the planet came together, Voltron like, into one huge station in the aftermath.\nLife on the huge station is harsh, with all crimes being punishable by death- unless you’re under 18, in which case you’re imprisoned for an indeterminate amount of time.\nAfter 97 years, the people in charge in the station decide to try to test the survivability of the landscape on the planet post-nuclear war and send down 100 (see?) of the young prisoners to act as canaries.\nEarly on in the first episode, we’re introduced to the main characters. They include:\nClarke, the main character, a blond haired, blue eyed young woman.\nBellamy, the bad boy, a dark and handsome type.\nFinn, the archetype of every college sophomore white guy who wants to change the world (seriously, he’s insufferable).\nand\nWells, the son of the Chancellor, the ruler of the remnants of humanity. Also the only black character of the 100 with more than a line for approximately eight episodes.\nOf the titular 100, two facts stand out. They’re overwhelmingly white. And they’re overwhelmingly good looking.\nWells, the aforementioned Only Black Kid With A Line, is quietly in love with Clarke. But her heart, of course, belongs with Finn. The idea that the CW would pair the white, blond, female main character off with the dark African American character seems like it was always a non-starter for the show.\nAnd so it is, for the first three episodes, that Clarke and Finn run around the landscape, have sex, have adventures, etc, while Wells stays behind and off camera for the most part. This ends abruptly as Wells is killed off at the end of the third episode.\nWells’ death is used, of course, as a means for the white characters to ruminate on their society and capital punishment. That’s pretty much it. Wells is blackrificed to further the storyline of his white compatriots.\nBut wait! There’s more.\nNow, one of the other two black characters is Wells’ father, Chancellor Jaha. Let’s take a moment to understand that in the world of this show, the only two black characters of note from space are related.\nJaha is up on the station, relegating him to a supporting character (the appeal of the show for the CW’s demographic has got to be the promise of a world without parents). He is the leader of his people and does leader things, like giving cliched inspirational speeches and getting exasperated with his subordinates.\nEventually, in order to make the first season’s penultimate episode have some dramatic heft, the station becomes unlivable due to the machinations of Ellen Tigh and its inhabitants are forced to flee. In order to do so, the stations have to decouple from the main hub.\nOf course this all goes very wrong and someone has to stay behind to hit the release button and then die, slowly.\nSo who makes this sacrifice? One of the 1000s of people left on the station?\nNope.\nChancellor Jaha.\nYes, he blackrifices himself so that the remaining people on the station can live.\nI don’t know if the show was going for some asinine captain goes down with the ship shit here, but man…\nAll the other characters of color are either comic relief for the kids or the mysterious, tribal “Grounders,” a tough and savage race who managed to survive the nuclear holocaust. They, unlike the “good guys,” or the kids from the ship and the people on the ship, are not majority white.\nOne of these supporting characters, called Lincoln (of course), acts to move the story along by acting as the savior of supporting character Octavia. Presented initially as a wild, dark man, he eventually shows that he knows English and acts the part of noble savage.\nLincoln continues to throw himself in harms way to protect the white adventuring kids. That’s pretty much his role. Attempted blackrifice.\nOh yeah, then he introduces the Clarke to the leader of his people. Who is also a blond haired white woman.\nThe 100 is a well done show, for what it is. Don’t get me wrong. But the continual misfires, intentional or not, are creating a show that seems more interested in maintaining an old culture while telling the story of a new one.\nIt’s no surprise, really, that people of color feel that they have to start movements to assert the value of their lives. When our culture teaches teens subtle lessons in white supremacy like The 100 does, the value of black life is eroded by the time they reach adulthood."
