In [116]:
import nltk
import os
import re
import numpy as np
from lxml import etree
from lxml import html
import codecs



from __future__ import print_function
from nltk.stem import *
from nltk.stem.porter import *
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer

Lem = WordNetLemmatizer()
cwd = os.getcwd()

from pylab import *
from numpy.matlib import repmat
import sys
import matplotlib 
import matplotlib.pyplot as plt
from scipy.io import loadmat
import time
from helper import *
%matplotlib notebook

print('You\'re running python %s' % sys.version.split(' ')[0])


print('The nltk version is {}.'.format(nltk.__version__))
print('The numpy version is {}.'.format(np.__version__))

You're running python 2.7.16
The nltk version is 3.4.5.
The numpy version is 1.16.5.


In [135]:
def preprocess(text,stemLem='s'):
    tokenizer = nltk.RegexpTokenizer(r"\w+")
    raw_words = tokenizer.tokenize(text)
    stopword_list = stopwords.words('english')
    stemmer = SnowballStemmer("english")
    processed = []
    Lem = WordNetLemmatizer()
    
    for word in raw_words:        
        word_lower = word.lower()
        if word_lower in stopword_list:
            'do nothing'
        else:
            if stemLem == 'l':
                try:
                    lemword = Lem.lemmatize(word_lower)
                    processed.append(lemword)
                except UnicodeDecodeError:
                    'do nothing'
                
            if stemLem == 's':
                try:
                    processed.append(stemmer.stem(word_lower))
                except UnicodeDecodeError:
                    'do nothing'
                    
    return processed

def freqMap(processed_text,k=1):
    word_frequencies = {}
    for word in processed_text:
        if word in word_frequencies:
            word_frequencies[word] += 1
        else:
            word_frequencies[word] = 1
    for word, freq in word_frequencies.items():
        if freq <= k:
            word_frequencies.pop(word)
    return word_frequencies


def cleanhtml(raw_html):
    #remove what's between <date> and </date>
    date_cleaner = re.compile('<date>.*?</date>')
    cleanr = re.compile('<.*?>')
    
    cleantext = re.sub(date_cleaner, '', raw_html)
    cleantext = re.sub(cleanr, '', cleantext)
    return cleantext

def loadData(folderDir,k=1):
    '''
    input: 
        folderDir: directory of the data
        k: words mentioned this number of times or less will not included in the word frequency map
    outputs: 
        x: (n x d) posts in word frequency format
        y: (n) raw labels
        corpus: all the words used in dataset
    
    '''
    parser = etree.XMLParser(recover=True)
    
    
    
    corpus = set()
    #get the filenames in the directory
    labels = os.listdir(folderDir)
    labels = sorted(labels)
    
    n = len(labels)
    x = [None]*n
    y = [None]*n
    
    
    #go through each file and get the text
    
    #considering maybe using XML trees here in stead of doing it this way
    for i in range(n):
        with codecs.open(folderDir + labels[i], 'r', encoding='utf-8', errors='ignore') as f:
            raw_file_data_string = f.read()  
#             print(raw_file_data_string.split('<post>'))
        
        
        #take tags out of raw text:
        clean_text_data = cleanhtml(raw_file_data_string)
        
        #stem or lemmatize the words in string file
        processed_doc = preprocess(clean_text_data)
        
        #make a frequency map of the words in the string
        freqs = freqMap(processed_doc,k)
        
        
        x[i] = freqs
        y[i] = labels[i][:-4]
    
    #create the corpus
    for blogger in x:
        for key in blogger.keys():
            corpus.add(key)
        
    #ensure corpus has only unique elements and is sorted
    corpus = sorted(list(corpus))
    
    return x ,y ,corpus

def vectorizeX(x_raw,corpus):
    '''
    input:
        x_raw: the word_frequency maps used by the n bloggers
        corpus: the total words used by all the bloggers
    
    output:
        x: (n x d) xs in vector form where each xi has numbers from 0 and up in each dimension
    '''
    
    d = len(corpus)
    n = len(x_raw)
    x = np.zeros((n,d))
    
#     this needs to be sped up... I think I should use some sort of hashing or dictionary 
#     and only go through the words in each entry
# '''
#     this goes through only the words in each blogentry, so that the alg does not have to go through all n*d entries

#     #go through each blog entry
#     for bloggerentryindex in range(n):
#         blogentry = x_raw[bloggerentryindex]
        
#         #for each word in the blog entry look up it's index and add that to the appropriate spot in the x matrix
#         for word in blogentry:
#             wordIndex = getWordIndex(corpus)
#             x[bloggerentryindex][wordIndex] = x_raw[bloggerentryindex][word]
# '''
    for j in range(d):
        wordj = corpus[j]
        for i in range(n):
            if wordj in x_raw[i]:
                x[i][j] = x_raw[i][wordj]
        

    return x

def vectorizeY(y_raw,label_type='age'):
    
    n = len(y_raw)
    temp = [None]*n
    y_out = np.zeros(n)
    for i in range(n):
        temp[i] = y_raw[i].split('.')
        if label_type=='age':
            y_out[i] = int(temp[i][2])
        elif label_type=='sex':
            sex = temp[i][1]
            if sex == 'male':
                y_out[i] = 1
            else:
                y_out[i] = 0
        elif label_type=='wc':
            y_out[i] = int(temp[i][0])
        elif label_type=='job':
            print('not implemented yet needs more thinking first')
            y_out[i] = temp[i][3]
        elif label_type=='sign':
            print('not implemented yet needs more thinking first')
            y_out[i] = temp[i][4]

    
    return y_out



In [100]:
def loadDataBlogEntry(folderDir,k=1):
    '''
    input: 
        folderDir: directory of the data
        k: words mentioned this number of times or less will not included in the word frequency map
    outputs: 
        x: (n*(number of entries per blog) x d) posts in word frequency format
        y: (n) raw labels
        corpus: all the words used in dataset
    
    '''
    parser = etree.XMLParser(recover=True)
    
    
    
    corpus = set()
    #get the filenames in the directory
    labels = os.listdir(folderDir)
    labels = sorted(labels)
    
    n = len(labels)
    x = []
    y = []
    
    
    #go through each file and get the text
    
    #considering maybe using XML trees here in stead of doing it this way
    for i in range(n):
        
        #store the path
        path = folderDir + labels[i]
        
        
        #open the file in path, encoding to utf-8 and ignoring errors ie non utf-8 characters
        with codecs.open(path, 'r', encoding='utf-8', errors='ignore') as f:
            
            #store the file content as a string
            raw_file_data_string = f.read()
            
            #store the files in html tree format (XMl is not working because of some errors in the files)
            blog = html.fromstring(raw_file_data_string)
            
            #go through each entry in the blog and process text to become a vector
            for entry in blog:
                
                #only store the blog entries, not dates as data for the x vector
                if entry.tag == 'post':
                    entrytext = entry.text
                    
                    #stem or lemmatize the words in string file
                    processed_entry = preprocess(entrytext)
                    
                    #make a frequency map of the words in the string
                    freqs = freqMap(processed_entry,k)
                    
                    #store the frequency map of words in the x_raw vector
                    x.append(freqs)
                    
                    #store the label of the blog in the y_raw vector
                    y.append(labels[i][:-4])
        
        
        
        
        
        
        
        
        
        
        
        
    
    #create the corpus
    for blogentry in x:
        for key in blogentry.keys():
            corpus.add(key)
        
    #ensure corpus has only unique elements and is sorted
    corpus = sorted(list(corpus))
    
    return x ,y ,corpus

In [136]:
x_raw, y_raw, corpus = loadData(cwd + '/blogsSmall/',1)

In [171]:
print(len(x_raw[0]))

1008


In [167]:
x_rawposts, y_rawposts, corpus = loadDataBlogEntry(cwd + '/blogsSmall/',1)

In [170]:
print(len(x_rawposts[0]))

65


In [137]:
x = vectorizeX(x_raw,corpus)
y = vectorizeY(y_raw)

In [164]:
print(len(x))

True


In [138]:
#create test sets:
x_test_raw, y_test_raw, _ = loadData(cwd + '/blogsSmallTest/',1)
x_test = vectorizeX(x_test_raw,corpus)
y_test = vectorizeY(y_test_raw)
# print(x,y)

## You will use the regression tree from a previous project. As a reminder, the following code shows you how to instantiate a decision tree:

In [145]:
# Create a regression tree with depth = whatever I want
tree = RegressionTree(depth=10)

# To fit/train the regression tree
tree.fit(x, y)

# To use the trained regression tree to predict a score
age_Tr = tree.predict(x)

# To use the trained regression tree to predict x_test to see how it works on a test set the example

age_Te = tree.predict(x_test)


## Now run the following cell that essentially evaluate your depth-4 decision tree on the training set and test set. You might see that the difference between training error and test error is small (not overfitting) but both the errors are rather high. This is a sign of underfitting or high bias.


In [148]:
# Evaluate the depth 4 decision tree
# tr_err   = np.mean((np.sign(tree.predict(xTrSpiral)) - yTrSpiral)**2)
# te_err   = np.mean((np.sign(tree.predict(xTeSpiral)) - yTeSpiral)**2)

print("Training loss (OLS): %.4f" % np.mean((age_Tr-y)**2))
print("Testing loss (OLS):  %.4f" % np.mean((age_Te-y_test)**2))

Training loss (OLS): 0.6734
Testing loss (OLS):  6.1230


Ok I want to point out that this has taken like 20 minutes to train with a CART tree depth of 8 and the minumum number of word metions to be counted is 0 (separated by entry).  I thought this would givem more descriptive data, but it's just taking forever and there was pretty much no difference in loss between the depth 5 tree and depth 8 tree, so I am going to make the minimum number of times a word is counted higher to speed it up, and also I am going to speed up the vectorize X function and think about if the order of the x features in the vector really matters/ if the corpus needs to be in alphabetical order.

Let's see how old this classifier thinks I am:

In [161]:
cleanDoc = cleanhtml('''
Hi, My name is Jude I graduated college 2 years ago.  
then I travelled the world a bit before this damn pandemic hit.
I also worked as a software engineer, and surfed a few pro contests.

I have come along way in my journey to become a good tube rider.
I figured out what type of boards I like for tubes, and how to navigate
the different types of tubes (at least frontside).  Additionally, I 
have trained myself to surf tubes in both beach breaks and reef breaks.
The main difference between the two types being the consistency of where
the wave breaks, and what good waves look like.
''')
processed = preprocess(cleanDoc,stemLem='s')
# print(processed)
doc_raw = freqMap(processed,k=1)

x_doc = vectorizeX([doc_raw], corpus)
# print(x_doc)
# print(evalboostforest(trees, x_doc))
print(tree.predict(x_doc)[0])
print(evalboostforest(trees, x_doc))

33.0
[29.89047346]


It thinks I'm about 28 which is only 3 years off

one issue I have encountered with this classifier is with older writers as much of the training dataset I have is younger writers, with the max age being about 45.  This means the predictions aren't likely to be higher than that.

Below I am going to test it for a 38 year old and a 58 year old and see what it says:

In [162]:
testset = [doc38yo,doc58yo,"Being 40 is great because if I eat whatever I want for two weeks, I look like I'm ready to be floated out to sea."]
x_testset = []
for writing in testset:
    processed = preprocess(writing,stemLem='s')
    freqMapEntry = freqMap(processed,k=1)
    x_testset.append(freqMapEntry)
    
x_testset = vectorizeX(x_testset, corpus)
print(x_testset)
# print(evalboostforest(trees, x_doc))
print(tree.predict(x_testset))
print(evalboostforest(trees, x_testset))

  # This is added back by InteractiveShellApp.init_path()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[33.         24.57894737 33.        ]
[30.10919889 22.02474992 29.89047346]


In [153]:
def evalboostforest(trees, X, alphas=None):
    """Evaluates X using trees.
    
    Input:
        trees:  list of TreeNode decision trees of length m
        X:      n x d matrix of data points
        alphas: m-dimensional weight vector
        
    Output:
        pred: n-dimensional vector of predictions
    """
    m = len(trees)
    n,d = X.shape
    
    if alphas is None:
        alphas = np.ones(m) / len(trees)
            
    pred = np.zeros(n)
    
    # YOUR CODE HERE
    for t in range(m):
        predt = trees[t].predict(X)
#         print(alphas.shape)
#         print(predt.shape)
        pred = pred + alphas[t]*predt
        
    return pred

In [154]:
def GBRT(xTr, yTr, m, maxdepth=4, alpha=0.1):
    """Creates GBRT.
    
    Input:
        xTr:      n x d matrix of data points
        yTr:      n-dimensional vector of labels
        m:        number of trees in the forest
        maxdepth: maximum depth of tree
        alpha:    learning rate for the GBRT
        
        
    Output:
        trees: list of decision trees of length m
        weights: weights of each tree
    """
    
    n, d = xTr.shape
    trees = []
    weights = []
    
    # Make a copy of the ground truth label
    # this will be the initial ground truth for our GBRT
    # This should be updated for each iteration
    t = np.copy(yTr)
    
    
    # YOUR CODE HERE
    x = xTr
    y = yTr
    H = np.zeros(n)
    t = np.zeros(n)
    for _ in range(m):
        for i in range(n):
            t[i] = (y[i] - H[i])
        ht = RegressionTree(depth=maxdepth)
        ht.fit(x, t)
        ht_preds = ht.predict(x)
        H = H + alpha*ht_preds
        trees.append(ht)
        weights.append(alpha)

    
    return trees, weights

In [155]:
#trying out gradient boosted regression trees:

trees, weights = GBRT(x, y, 20,maxdepth=10, alpha = 1/float(len(x)))

# preds = evalboostforest(trees, x, weights)

In [159]:
preds = evalboostforest(trees, x)
preds_test = evalboostforest(trees, x_test)


In [160]:
# Evaluate the depth 4 decision tree
# tr_err   = np.mean((np.sign(tree.predict(xTrSpiral)) - yTrSpiral)**2)
# te_err   = np.mean((np.sign(tree.predict(xTeSpiral)) - yTeSpiral)**2)

print("Training loss (OLS): %.4f" % np.mean((preds-y)**2))
print("Testing loss (OLS):  %.4f" % np.mean((preds_test-y_test)**2))

Training loss (OLS): 8.5781
Testing loss (OLS):  14.9730


In [214]:
print(np.mean(preds-y))

-2.7887040837279877


0.011235955056179775

In [274]:
cleanDoc = cleanhtml(doc)
processed = preprocess(cleanDoc,stemLem='s')
# print(processed)
doc_raw = freqMap(processed,k=1)

x_doc = vectorizeX([doc_raw], corpus)
print(x_doc)
print(evalboostforest(trees, x_doc))
print(tree.predict(x_doc))


[[0. 0. 0. ... 0. 0. 0.]]
[25.16034034]
[25.]


In [275]:
cleanDoc = cleanhtml(doc2)
processed = preprocess(cleanDoc,stemLem='s')
# print(processed)
doc_raw = freqMap(processed,k=1)

x_doc = vectorizeX([doc_raw], corpus)
print(x_doc)
print(evalboostforest(trees, x_doc))
print(tree.predict(x_doc))

[[0. 0. 0. ... 0. 0. 0.]]
[25.16034034]
[25.]


  # This is added back by InteractiveShellApp.init_path()


In [278]:
cleanDoc = cleanhtml(doc4)
processed = preprocess(cleanDoc,stemLem='s')
# print(processed)
doc_raw = freqMap(processed,k=1)

x_doc = vectorizeX([doc_raw], corpus)
print(x_doc)
print(evalboostforest(trees, x_doc))
print(tree.predict(x_doc))

[[0. 0. 0. ... 0. 0. 0.]]
[25.16034034]
[25.]


  # This is added back by InteractiveShellApp.init_path()


In [279]:
cleanDoc = cleanhtml(doc5)
processed = preprocess(cleanDoc,stemLem='s')
# print(processed)
doc_raw = freqMap(processed,k=1)

x_doc = vectorizeX([doc_raw], corpus)
print(x_doc)
print(evalboostforest(trees, x_doc))
print(tree.predict(x_doc))

  # This is added back by InteractiveShellApp.init_path()


[[0. 0. 0. ... 0. 0. 0.]]
[25.16034034]
[40.]


In [109]:
 doc = """In the script above we created a dictionary called wordfreq.
    Next, we iterate through each sentence in the corpus. 
    The sentence is tokenized into words. Next, we iterate through each word in the sentence. 
    If the word doesn't exist in the wordfreq dictionary, we will add the word as the key and
    will set the value of the word as 1. Otherwise, if the word already exists in the dictionary,
    we will simply increment the key count by 1.If you are executing the above in the Spyder editor
    like me, you can go the variable explorer on the right and click wordfreq variable. You should
    see a dictionary like this:"""
    
doc2 = '''My friend Lucas Oliveira has psoriasis, which comes from or is triggered by a weak immune system.  Recently his psoriasis symptoms have been mild or almost nonexistent.  I believe that an accident might strengthen his immune system and help to curb his psoriasis symptoms in the future. 

Explaining in more detail, my shower was very dirty lately with a pink slime that was in the areas where the water was not drying quickly.  I cleaned most of it with paper towels and a spray bottle cleaner, but was unable to get the slime out of the grout.  I ended up using a toothbrush to clean the grout.  It was very effective, but after I wrapped it up in paper towels and left it on the floor for future use.  In an unfortunate confusion, Lucas ended up using the toothbrush to brush his teeth.  Although I had washed the toothbrush several times, he complained of feeling something in his throat the next day, and two days later had to leave work with a fever, a sore throat, and difficulty swallowing.  His doctor said it is a throat infection, and that he needs rest and to eat soup as it heals.  It is nothing too serious.  I also recommended that he take vitamin C to help boost his immune system, which is already weak.  I believe this infection experience will strengthen his immune system and has made me wonder if this could be a possible therapy for those with Psoriasis.

	Of course infecting people with weak immune systems has its risks, but maybe for patients with severe life-debilitating symptoms this could prove to lead to more satisfactory living.  The method of infection should be more controlled than the shower bacteria and germs that infected my poor friend Lucas, but perhaps there is a specific type of bacteria or virus that can help strengthen the body’s immune system specifically to fend off the symptoms of Psoriasis.  Also it is worth noting that this treatment or therapy would be best when the patient’s psoriasis symptoms and stress are at a minimum.  My personal concern over this issue has come from the fact that I have two friends with psoriasis, and I see the pain that it causes in their lives and I wish I could help.  Another complication worth noting is that there are medications for psoriasis from big pharma companies which I am sure would make this sort of treatment or study difficult, especially these days with anti vaxxers and other anti science movements on the rise.
'''

doc3 = "hello, I am a friendly person.  Definitely not from mars yes yes yes yes I like cows cows cows cows.  Have you ever had a milkshake before? wow wow wow wow so good good good good"

doc38yo = '''What Does it Mean to Live a Healthy Life?
This is one of the central questions that our community is working to answer.

For my part, I started asking this question when I decided that exercising and eating healthy, while important, is a painfully limited view of what it means to live a healthy life.

I would imagine that we’ll be answering this question for the rest of our lives, but here’s the answer I have today…

Creativity
A healthy life involves creativity.

I mean this in the literal sense. Creative pursuits (art, dance, music, etc.) have been proven to reduce stress and provide numerous health benefits. Furthermore, I believe these activities are good for you because it’s healthy to contribute something to the world around you.

Creativity also keeps you grounded. People who spend their life creating understand how hard it is to make something valuable. They appreciate great work. They respect the willingness to take a risk and the effort required to be part of the conversation. They live in the arena instead of judging from the crowd.

And perhaps most importantly: It is through the act of creating new experiences that we discover who we are and what is important to us. The act of creating things is one of the best ways to avoid living a short, unimportant life.

The joy of creativity is why I love photography and why I'm working to become better at it.

Physicality
Nobody dreams about typing at a keyboard.

Almost everything we fantasize about involves a physical action: making the game winning shot, traveling the world, creating beautiful art. To accomplish the things we dream about, we are required to be makers, explorers, and creators. We are required to move and become active participants in life. In other words, to live fulfilling lives, we need to live physical lives.

Too often, however, we take for granted the opportunity that our health gives us to live a life that’s true to us. Good health allows you to actively participate in your life. Not everyone chooses to live fully, but if you’re not physically fit then you can’t even make the choice if you wanted to do so.

And finally, one of the best ways I’ve found for improving my mind is by improving my body. Not every person who is strong mentally pushes themselves physically. But I have yet to find someone who pushes themselves physically that isn’t strong mentally.

The benefits of living a physical life is why I believe in strength training and why I'm working to become the type of person who never misses a workout.

Connection
Human beings are the most social creatures on planet earth. Fundamentally, we are here for connection. So, I have trouble believing that you are living a healthy life if it doesn't involve others.

Furthermore, the people you connect with will either make you or destroy you. Those who have already walked through the fire can help you do the same. And those who haven’t will make it seem impossible for you to do so.

Exploration
To be curious is to be human. Imagine the first year — or even five years — of your life. Almost everything you are exposed to is new. Each day brings something to discover, something to learn, something to explore.

And then somehow, as the years roll by, we grow up and start to transition. We begin to learn things by reading about them and watching them instead of by experiencing them.

When we are babies, we learn about the world by touching it, holding it, and living it … and not merely reading about it. Sure, we will eat a stray LEGO block every now and then, but we make it past the occasional misstep and end up better off. I believe there is value in experiencing the world around us instead of simply learning about it.

These wonders of explorations are why I travel around the world in search of new cultures, new ideas, and new technologies. My hope is to learn things by living them, and to do my best to spread those lessons far and wide. The best ideas are always somewhere. My goal is to share them everywhere.

Participation
Living a healthy life is rooted in participating in the world around you. The people who are living healthy, fulfilling lives are actively living instead of simply being alive.

The famous physicist Richard Feynman summarizes this idea perfectly:

“Fall in love with some activity, and do it! Nobody ever figures out what life is all about, and it doesn't matter. Explore the world. Nearly everything is really interesting if you go into it deeply enough. Work as hard and as much as you want to on the things you like to do the best. Don't think about what you want to be, but what you want to do. Keep up some kind of a minimum with other things so that society doesn't stop you from doing anything at all.”

—Richard P. Feynman

Go Live a Healthy Life
Here in the United States, I’m getting ready to celebrate Christmas with my family.

I’ll be doing my best to create (by taking photos, of course), live a physical life (I’ll still be lifting during the holidays), connecting with family and friends, exploring the world by experiencing it (the experience of opening some presents, perhaps?), and participating in life.

Happy Holidays. Wherever you are in the world and whatever you’re celebrating, live well.'''

doc58yo = '''n the early days of the run-up to the 2016 election, I was just beginning to prepare a class on whiteness to teach at Yale University, where I had been newly hired. Over the years, I had come to realize that I often did not share historical knowledge with the persons to whom I was speaking. “What’s redlining?” someone would ask. “George Washington freed his slaves?” someone else would inquire. But as I listened to Donald Trump’s inflammatory rhetoric during the campaign that spring, the class took on a new dimension. Would my students understand the long history that informed a comment like one Trump made when he announced his presidential candidacy? “When Mexico sends its people, they’re not sending their best,” he said. “They’re sending people that have lots of problems, and they’re bringing those problems with us. They’re bringing drugs. They’re bringing crime. They’re rapists.” When I heard those words, I wanted my students to track immigration laws in the United States. Would they connect the treatment of the undocumented with the treatment of Irish, Italian and Asian people over the centuries?

In preparation, I needed to slowly unpack and understand how whiteness was created. How did the Naturalization Act of 1790, which restricted citizenship to “any alien, being a free white person,” develop over the years into our various immigration acts? What has it taken to cleave citizenship from “free white person”? What was the trajectory of the Ku Klux Klan after its formation at the end of the Civil War, and what was its relationship to the Black Codes, those laws subsequently passed in Southern states to restrict black people’s freedoms? Did the United States government bomb the black community in Tulsa, Okla., in 1921? How did Italians, Irish and Slavic peoples become white? Why do people believe abolitionists could not be racist?

I wanted my students to gain an awareness of a growing body of work by sociologists, theorists, historians and literary scholars in a field known as “whiteness studies,” the cornerstones of which include Toni Morrison’s “Playing in the Dark: Whiteness and the Literary Imagination,” David Roediger’s “The Wages of Whiteness,” Matthew Frye Jacobson’s “Whiteness of a Different Color: European Immigrants and the Alchemy of Race,” Richard Dyer’s “White” and more recently Nell Irvin Painter’s “The History of White People.” Roediger, a historian, had explained the development of the field, one that my class would engage with, saying, “The 1980s and early ’90s saw the publication of major works on white identity’s intricacies and costs by James Baldwin and Toni Morrison, alongside new works by white writers and activists asking similar questions historically. Given the seeming novelty of such white writing and the urgency of understanding white support for Ronald Reagan, ‘critical whiteness studies’ gained media attention and a small foothold in universities.” This area of study aimed to make visible a history of whiteness that through its association with “normalcy” and “universality” masked its omnipresent institutional power.

My class eventually became Constructions of Whiteness, and over the two years that I have taught it, many of my students (who have included just about every race, gender identity and sexual orientation) interviewed white people on campus or in their families about their understanding of American history and how it relates to whiteness. Some students simply wanted to know how others around them would define their own whiteness. Others were troubled by their own family members’ racism and wanted to understand how and why certain prejudices formed. Still others wanted to show the impact of white expectations on their lives.

Perhaps this is why one day in New Haven, staring into the semicircle of oak trees in my backyard, I wondered what it would mean to ask random white men how they understood their privilege. I imagined myself — a middle-aged black woman — walking up to strangers and doing so. Would they react as the police captain in Plainfield, Ind., did when his female colleague told him during a diversity-training session that he benefited from “white male privilege”? He became angry and accused her of using a racialized slur against him. (She was placed on paid administrative leave, and a reprimand was placed permanently in her file.) Would I, too, be accused? Would I hear myself asking about white male privilege and then watch white man after white man walk away as if I were mute? Would they think I worked for Trevor Noah or Stephen Colbert and just forgot my camera crew? The running comment in our current political climate is that we all need to converse with people we don’t normally speak to, and though my husband is white, I found myself falling into easy banter with all kinds of strangers except white men. They rarely sought me out to shoot the breeze, and I did not seek them out. Maybe it was time to engage, even if my fantasies of these encounters seemed outlandish. I wanted to try.

Weeks later, it occurred to me that I tend to be surrounded by white men I don’t know when I’m traveling, caught in places that are essentially nowhere: in between, en route, up in the air. As I crisscrossed the United States, Europe and Africa giving talks about my work, I found myself considering these white men who passed hours with me in airport lounges, at gates, on planes. They seemed to me to make up the largest percentage of business travelers in the liminal spaces where we waited. That I was among them in airport lounges and in first-class cabins spoke in part to my own relative economic privilege, but the price of my ticket, of course, does not translate into social capital. I was always aware that my value in our culture’s eyes is determined by my skin color first and foremost. Maybe these other male travelers could answer my questions about white privilege. I felt certain that as a black woman, there had to be something I didn’t understand.

The election. And its impact on you.
Special offer: Subscribe for $1 a week.
Just recently, a friend who didn’t get a job he applied for told me that as a white male, he was absorbing the problems of the world. He meant he was being punished for the sins of his forefathers. He wanted me to know he understood it was his burden to bear. I wanted to tell him that he needed to take a long view of the history of the workplace, given the imbalances that generations of hiring practices before him had created. But would that really make my friend feel any better? Did he understand that today, 65 percent of elected officials are white men, though they make up only 31 percent of the American population? White men have held almost all the power in this country for 400 years.

[The grief that white Americans can’t share.]

I knew that my friend was trying to communicate his struggle to find a way to understand the complicated American structure that holds us both. I wanted to ask him if his expectation was a sign of his privilege but decided, given the loss of his job opportunity, that my role as a friend probably demanded other responses.

After a series of casual conversations with my white male travelers, would I come to understand white privilege any differently? They couldn’t know what it’s like to be me, though who I am is in part a response to who they are, and I didn’t really believe I understood them, even as they determined so much of what was possible in my life and in the lives of others. But because I have only lived as me, a person who regularly has to negotiate conscious and unconscious dismissal, erasure, disrespect and abuse, I fell into this wondering silently. Always, I hesitated.

Editors’ Picks

Scientists Destroyed a Nest of Murder Hornets. Here’s What They Learned.

Savannah Guthrie Is Feeling Lucky

I’m Tired of Babysitting Man-Babies at Work
Continue reading the main story
I hesitated when I stood in line for a flight across the country, and a white man stepped in front of me. He was with another white man. “Excuse me,” I said. “I am in this line.” He stepped behind me but not before saying to his flight mate, “You never know who they’re letting into first class these days.”

Was his statement a defensive move meant to cover his rudeness and embarrassment, or were we sharing a joke? Perhaps he, too, had heard the recent anecdote in which a black woman recalled a white woman’s stepping in front of her at her gate. When the black woman told her she was in line, the white woman responded that it was the line for first class. Was the man’s comment a sly reference? But he wasn’t laughing, not even a little, not even a smile. Deadpan.

Later, when I discussed this moment with my therapist, she told me that she thought the man’s statement was in response to his flight mate, not me. I didn’t matter to him, she said; that’s why he could step in front of me in the first place. His embarrassment, if it was embarrassment, had everything to do with how he was seen by the person who did matter: his white male companion. I was allowing myself to have too much presence in his imagination, she said. Should this be a comfort? Was my total invisibility preferable to a targeted insult?


During the flight, each time he removed or replaced something in his case overhead, he looked over at me. Each time, I looked up from my book to meet his gaze and smiled — I like to think I’m not humorless. I tried to imagine what my presence was doing to him. On some level, I thought, I must have dirtied up his narrative of white privilege securing white spaces. In my class, I had taught “Whiteness as Property,” an article published in The Harvard Law Review in 1993, in which the author, Cheryl Harris, argues that “the set of assumptions, privileges and benefits that accompany the status of being white have become a valuable asset that whites sought to protect.” These are the assumptions of privilege and exclusion that have led many white Americans to call the police on black people trying to enter their own homes or vehicles. Racial profiling becomes another sanctioned method of segregating space. Harris goes on to explain how much white people rely on these benefits, so much so that their expectations inform the interpretations of our laws. “Stand your ground” laws, for example, mean whites can claim that fear made them kill an unarmed black person. Or voter-registration laws in certain states can function as de facto Jim Crow laws. “American law,” Harris writes, “has recognized a property interest in whiteness.”

On the plane, I wanted to enact a new narrative that included the whiteness of the man who had stepped in front of me. I felt his whiteness should be a component of what we both understood about him, even as his whiteness would not be the entirety of who he is. His unconscious understanding of whiteness meant the space I inhabited should have been only his. The old script would have left his whiteness unacknowledged in my consideration of his slight. But a rude man and a rude white man have different presumptions. Just as when a white person confronted by an actual black human being needs to negotiate stereotypes of blackness so that he can arrive at the person standing before him, I hoped to give the man the same courtesy but in the reverse. Seeing his whiteness meant I understood my presence as an unexpected demotion for him. It was too bad if he felt that way. Still, I wondered, what is this “stuckness” inside racial hierarchies that refuses the neutrality of the skies? I hoped to find a way to have this conversation.

[Sign up for Race/Related, a weekly newsletter focused on race, identity and culture.]

The phrase “white privilege” was popularized in 1988 by Peggy McIntosh, a Wellesley College professor who wanted to define “invisible systems conferring dominance on my group.” McIntosh came to understand that she benefited from hierarchical assumptions and policies simply because she was white. I would have preferred if instead of “white privilege” she had used the term “white dominance,” because “privilege” suggested hierarchical dominance was desired by all. Nonetheless, the phrase has stuck. The title of her essay “White Privilege and Male Privilege: A Personal Account of Coming to See Correspondences Through Work in Women’s Studies” was a mouthful. McIntosh listed 46 ways white privilege is enacted. “Number 19: I can speak in public to a powerful male group without putting my race on trial”; “Number 20: I can do well in a challenging situation without being called a credit to my race”; “Number 27: I can go home from most meetings of organizations I belong to feeling somewhat tied in, rather than isolated, out-of-place, outnumbered, unheard, held at a distance or feared”; “Number 36: If my day, week or year is going badly, I need not ask of each negative episode or situation whether it has racial overtones.” I’m not clear why McIntosh stopped at 46 except as a way of saying, “You get the picture.” My students were able to add their own examples easily.

My students and I also studied the work of the white documentary filmmaker Whitney Dow. In the last couple of years, Dow has been part of Columbia University’s Interdisciplinary Center for Innovative Theory and Empirics (Incite), which gathered data on more than 850 people who identify as white or partly white and the communities in which they live. He filmed more than a hundred of their oral histories. This work, like McIntosh’s, was another way of thinking about the ordinariness of white hierarchical thinking. I asked Dow what he learned in his conversations with white men. “They are struggling to construct a just narrative for themselves as new information comes in, and they are having to restructure and refashion their own narratives and coming up short,” he said. “I include myself in that,” he added after a moment. “We are seeing the deconstruction of the white-male archetype. The individual actor on the grand stage always had the support of a genocidal government, but this is not the narrative we grew up with. It’s a challenge to adjust.”

The interviews, collected in Incite’s initial report, “Facing Whiteness,” vary greatly in terms of knowledge of American history and experiences. One interviewee declares: “The first slave owner in America was a black man. How many people know that? The slaves that were brought to America were sold to the white man by blacks. So, I don’t feel that we owe them any special privileges other than that anybody else has, any other race.” While this interviewee denies any privilege, another has come to see how his whiteness enables his mobility in America: “I have to accept the reality that because I’m a man, I — whether I was aware of that or not at any specific time — probably had some sort of hand up in a situation.” He added, “The longer I’m in law enforcement and the more aware I am of the world around me, the more I realize that being of Anglo-Saxon descent, being a man and being in a region of America that is somewhat rural, and because it’s rural by default mostly white, means that I definitely get preference.” This interviewee, who while recognizing his privilege, and who according to Whitney Dow had been “pretty ostracized because of his progressiveness” in the workplace, still indicates — through his use of words like “probably” and phrases like “because it’s rural by default mostly white” — that he believes white privilege is in play in only certain circumstances. Full comprehension would include the understanding that white privilege comes with expectations of protection and preferences no matter where he lives in the country.

[How privilege became a provocation.]

How angry could I be at the white man on the plane, the one who glanced at me each time he stood up the way you look at a stone you had tripped on? I understood that the man’s behavior was also his socialization. My own socialization had, in many ways, prepared me for him. I was not overwhelmed by our encounter because my blackness is “consent not to be a single being.” This phrase, which finds its origins in the work of the West Indian writer Édouard Glissant but was reintroduced to me in the recent work of the poet and critical theorist Fred Moten, gestures toward the fact that I can refuse the white man’s stereotypes of blackness, even as he interacts with those stereotypes. What I wanted was to know what the white man saw or didn’t see when he walked in front of me at the gate.

It’s hard to exist and also accept my lack of existence. Frank Wilderson III, chair of African-American studies at the University of California, Irvine, borrows the sociological term “social death” to explain my there-but-not-there status in a historically anti-black society. The outrage — and if we are generous, the embarrassment — that occasioned the white passenger’s comment were a reaction to the unseen taking up space; space itself is one of the understood privileges of whiteness.

I was waiting in another line for access to another plane in another city as another group of white men approached. When they realized they would have to get behind a dozen or so people already in line, they simply formed their own line next to us. I said to the white man standing in front of me, “Now, that is the height of white male privilege.” He laughed and remained smiling all the way to his seat. He wished me a good flight. We had shared something. I don’t know if it was the same thing for each of us — the same recognition of racialized privilege — but I could live with that polite form of unintelligibility.

I found the suited men who refused to fall in line exhilarating and amusing (as well as obnoxious). Watching them was like watching a spontaneous play about white male privilege in one act. I appreciated the drama. One or two of them chuckled at their own audacity. The gate agent did an interesting sort of check-in by merging the newly formed line with the actual line. The people in my line, almost all white and male themselves, were in turn quizzical and accepting.

After I watched this scene play out, I filed it away to use as an example in my class. How would my students read this moment? Some would no doubt be enraged by the white female gate agent who let it happen. I would ask why it was easier to be angry with her than with the group of men. Because she doesn’t recognize or utilize her institutional power, someone would say. Based on past classes, I could assume the white male students would be quick to distance themselves from the men at the gate; white solidarity has no place in a class that sets out to make visible the default positions of whiteness.

As the professor, I felt this was a narrative that could help me gauge the level of recognition of white privilege in the class, because other white people were also inconvenienced by the actions of this group of men. The students wouldn’t be distracted by society’s abuse of minorities because everyone seemed inconvenienced. Some students, though, would want to see the moment as gendered, not racialized. I would ask them if they could imagine a group of black men pulling off this action without the white men in my line responding or the gate agent questioning the men even if they were within their rights.

As I became more and more frustrated with myself for avoiding asking my question, I wondered if presumed segregation in business or first class should have been Number 47 on McIntosh’s list. Just do it, I told myself. Just ask a random white guy how he feels about his privilege.


ImageI myself am overdetermined by my race. Is that avoidable? Is that a problem? Had I made the problem or was I given the problem?
I myself am overdetermined by my race. Is that avoidable? Is that a problem? Had I made the problem or was I given the problem?Credit...Photo illustration by Najeebah Al-Ghadban
On my next flight, I came close. I was a black woman in the company of mostly white men, in seats that allowed for both proximity and separate spaces. The flight attendant brought drinks to everyone around me but repeatedly forgot my orange juice. Telling myself orange juice is sugar and she might be doing my post-cancer body a favor, I just nodded when she apologized for the second time. The third time she walked by without the juice, the white man sitting next to me said to her: “This is incredible. You have brought me two drinks in the time you have forgotten to bring her one.”

She returned immediately with the juice.

I thanked him. He said, “She isn’t suited to her job.” I didn’t respond: “She didn’t forget your drinks. She didn’t forget you. You are seated next to no one in this no place.” Instead, I said, “She just likes you more.” He perhaps thought I was speaking about him in particular and blushed. Did he understand I was joking about white male privilege? It didn’t seem so. The red crept up his neck into his cheeks, and he looked shy and pleased at the same time. He brought both hands up to his cheeks as if to hold in the heat of this embarrassing pleasure.

“Coming or going?” he asked, changing the subject.

“I’m returning from Johannesburg.”

“Really?” he said. “I was just in Cape Town.”

Hence your advocacy, I thought ungenerously. Why was that thought in my head? I myself am overdetermined by my race. Is that avoidable? Is that a problem? Had I made the problem or was I given the problem?

As I looked at the man in Seat 2B, I wondered if my historical positioning was turning his humanity into evidence of white male dominance. Are white men overly determined by their skin color in my eyes? Are they being forced, as my friend suggested, to absorb the problems of the world?

On the long flight, I didn’t bring up white male privilege, jokes or otherwise, again. Instead we wandered around our recent memories of South Africa and discussed the resort where he stayed and the safari I took. I didn’t bring up Soweto or the Apartheid Museum that I visited in Johannesburg or the lynching memorial in Montgomery, Ala., which the Apartheid Museum reminded me of. I wanted my fellow traveler to begin a conversation about his privilege this time. For once. I wanted him to think about his whiteness, especially because he had just left South Africa, a country that suffered, as James Baldwin said, “from the same delusion the Americans suffer from — it too thought it was a white country.” But I imagined he felt the less said about race relations in the United States or South Africa, the more possible it was for us to be interlocutors. That was my fantasy, in any case.

Back home, when I mentioned these encounters to my white husband, he was amused. “They’re just defensive,” he said. “White fragility,” he added, with a laugh. This white man who has spent the past 25 years in the world alongside me believes he understands and recognizes his own privilege. Certainly he knows the right terminology to use, even when these agreed-upon terms prevent us from stumbling into moments of real recognition. These phrases — white fragility, white defensiveness, white appropriation — have a habit of standing in for the complicated mess of a true conversation. At that moment, he wanted to discuss our current president instead. “That,” he said, “is a clear case of indignation and rage in the face of privilege writ large. Real power. Real consequences.” He was not wrong, of course, but he joined all the “woke” white men who set their privilege outside themselves — as in, I know better than to be ignorant or defensive about my own privilege. Never mind that that capacity to set himself outside the pattern of white male dominance is the privilege. There’s no outrunning the kingdom, the power and the glory.

I finally got up my nerve to ask a stranger directly about white privilege as I was sitting next to him at the gate. He had initiated our conversation, because he was frustrated about yet another delay. We shared that frustration together. Eventually he asked what I did, and I told him that I write and teach. “Where do you teach?” he asked. “Yale,” I answered. He told me his son wanted to go there but hadn’t been accepted during the early-application process. “It’s tough when you can’t play the diversity card,” he added.

Was he thinking out loud? Were the words just slipping out before he could catch them? Was this the innocence of white privilege? Was he yanking my chain? Was he snapping the white-privilege flag in my face? Should I have asked him why he had the expectation that his son should be admitted early, without delay, without pause, without waiting? Should I have asked how he knew a person of color “took” his son’s seat and not another white son of one of these many white men sitting around us?

I was perhaps holding my breath. I decided to just breathe.

“The Asians are flooding the Ivy Leagues,” he added after a moment. Perhaps the clarification was intended to make it clear that he wasn’t speaking right now about black people and their forms of affirmative action. He had remembered something. He had recalled who was sitting next to him.

[50 years of affirmitive action: what went right and what it got wrong?]

Then I did it. I asked. “I’ve been thinking about white male privilege, and I wonder if you think about yours or your son’s?” It almost seemed to be a non sequitur, but he rolled with it.

“Not me,” he said. “I’ve worked hard for everything I have.”

What was it that Justice Brett Kavanaugh said at his Supreme Court confirmation hearing? “I got into Yale Law School. That’s the No. 1 law school in the country. I had no connections there. I got there by busting my tail in college.” He apparently believed this despite the fact that his grandfather went to Yale. I couldn’t tell by looking at this man I was sitting next to, but I wondered if he was an ethnic white rather than a white Anglo-Saxon Protestant. The historian Matthew Frye Jacobson, in “Whiteness of a Different Color,” describes “the 20th century’s reconsolidating of the 19th century’s ‘Celts, Slavs, Hebrews and Mediterraneans.’ ” By the 1940s, according to David Roediger, “given patterns of intermarriage across ethnicity and Cold War imperatives,” whites stopped dividing hierarchically within whiteness and begin identifying as socially constructed Caucasians.


I said to the man, “What if I said I wasn’t referring to generations of economic wealth, to Mayflower wealth and connections?” I asked him if he gets flagged when he passes through T.S.A. “Not usually,” he said. “I have Global Entry.”

“So do I,” I said, “but I still get stopped.” The “randomness” of racial profiling is a phenomenon I could talk about forever, but I stopped myself that day. “Are you able to move in and out of public spaces without being questioned as to why you are there?” I asked. “Do people rush forward asking how they can help you?” I knew the answer to my question, but I asked it anyway, because I wanted to slow down a dynamic he benefited from.

He said he saw my point. I wanted to say, “It’s not my point, it’s your reality,” but the declarative nature of the sentence felt sharp on my tongue. I wanted to keep talking with this man, and I knew my race and gender meant he was wary of me and my questions — questions that might lead to the word “racist” or “sexist.” If only skin color didn’t have such predictive power.

I didn’t want our different historical positioning derailing our already strained chat. I wanted to learn something that surprised me about this stranger, something I couldn’t have known beforehand. Then it hit me. There wasn’t enough time to develop trust, but everyone likes a listener. “Coming or going?” is the traveler’s neutral, nonprying question. So now I asked him. He was heading home.

The word “home” turned him back to his son. He said his son’s best friend was Asian and had been admitted to Yale on early action or early decision or early admissions. Neither of us knew the terminology. I wondered how he comforted his son. Had he used “the diversity card” as he had with me? I didn’t want to discuss college-admissions policy anymore. I wanted our conversation to go down any other road, but I had somehow become a representative of Yale, not a stranger sitting next to another stranger.

I reminded myself that I was there only to listen. Just listen. The man was deeply earnest and obviously felt helpless about the uncertainty of his son’s future. But it couldn’t be too dismal if Yale was still an option. Don’t think, I reminded myself. Know what it is to parent. Know what it is to love. Know what it is to be white. Know what it is to expect what white people have always had. Know what it is to resent. Is that unfair? Resentment has no home here. Know what it is to be white. Is that ungenerous? I don’t know. Don’t think.

I didn’t ask this white man why he thought his son was any more entitled to a place at Yale than his son’s Asian friend. I didn’t want him to feel he needed to defend his son’s worth or his son’s intelligence to me. I wanted his son to thrive. I did. Were his son to arrive in my class, I would help him do his best. The more he achieved at Yale, the more pleased I would be for both of us. If his son told the class he got into Yale because many of his white teachers from kindergarten on exaggerated his intelligence, I would interrupt him, as I have done in the past, and say, “No, you got into Yale, and you have the capacity to understand that many factors contributed to your acceptance.”

College-admissions processes can’t be discussed in definitive ways; they’re full of gray areas, and those gray areas are often white-leaning, even as plenty of whites are denied entrance. We know that. I was suddenly reluctant to have a conversation about white-perceived spaces and entitlement or, God forbid, affirmative action, which would of course flood the space between us with black and brown people, me included. I said instead, “Wherever your son goes will work out, and in five years none of this will matter.” It was in this moment that I recognized my exhaustion. And then came the realization that we were, in fact, in the midst of a discussion about the perceived loss of white male privilege. Was I implicated in his loss? Did he think so?

Not long after this, I was on another flight and sitting next to a white man who felt as if he could already be a friend. Our conversation had the ease of kicking a ball around on a fall afternoon. Or it felt like stepping out the door in late spring when suddenly the temperature inside and out reads the same on your skin. Resistance falls away; your shoulders relax. I was, metaphorically, happily outdoors with this man, who was open and curious with a sense of humor. He spoke about his wife and son with palpable affection. And though he was with me on the plane, he was there with them as well. His father was an academic, his mother a great woman.

He asked who my favorite musician was, and I told him the Commodores because of one song, “Nightshift,” which is basically an elegy. He loved Bruce Springsteen, but “Nightshift” was also one of his favorite songs. We sang lyrics from “Nightshift” together: “I still can hear him say, ‘Aw, talk to me so you can see what’s going on.’ ” When he asked if I knew a certain song by Springsteen, I admitted I didn’t. I could only think of “American Skin (41 Shots)”: “No secret, my friend, you can get killed just for living in your American skin.” I knew those lyrics, but I didn’t start singing them. I made a mental note to check out the Springsteen song he loved.

Eventually, he told me he had been working on diversity inside his company. “We still have a long way to go,” he said. Then he repeated himself — “We still have a long way to go” — adding, “I don’t see color.” This is a statement for well-meaning white people whose privilege and blind desire catapult them into a time when little black children and little white children are judged not “by the color of their skin but by the content of their character.” The phrase “I don’t see color” pulled an emergency brake in my brain. Would you be bringing up diversity if you didn’t see color? I wondered. Will you tell your wife you had a nice talk with a woman or a black woman? Help.

All I could think to say was, “Ain’t I a black woman?” I asked the question slowly, as if testing the air quality. Did he get the riff on Sojourner Truth? Or did he think the ungrammatical construction was a sign of blackness? Or did he think I was mocking white people’s understanding of black intelligence? “Aren’t you a white man?” I then asked. “Can’t you see that? Because if you can’t see race, you can’t see racism.” I repeated that sentence, which I read not long before in Robin DiAngelo’s “White Fragility.”

ADVERTISEMENT

Continue reading the main story
“I get it,” he said. His tone was solemn. “What other inane things have I said?”

“Only that,” I responded.

I had refused to let the reality he was insisting on be my reality. And I was pleased that I hadn’t lubricated the moment, pleased I could say no to the silencing mechanisms of manners, pleased he didn’t need to open up a vein of complaint. I was pleased he was not passively bullying. I was pleased he could carry the disturbance of my reality. And just like that, we broke open our conversation — random, ordinary, exhausting and full of a shared longing to exist in less segregated spaces.'''