In [None]:
import gensim.downloader as api
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
model = api.load("word2vec-google-news-300")

In [None]:
decades = {
    "1960s": ["ut","h","marcus","di","py","institute","may","social","quarterly","r","ep","mp","study","open","taeuber","journal","relation","vl","holden","deakin","wos","ti","ke","n","bp","santacruz","american","pt","affair","stegman","education","racial","kaplan","b","housing","planner","schoop","space","ej","af","au","er","policy","urban","brook","j","sn","raza"],
    "1970s": ["ep","j","bp","wos","vl","er","ut","pt","py","ti","af","urban","racial","di","sn","au","education","journal","change","study","school","university","detroit","l","law","ei","regional","anthropology","housing","segregation","dl","jm","dh","review","jf","e","ri","b","anonymous","science","american","g","residential","white","economics","neighborhood","rh","new","r","lawyer"],
    "1980s": ["er","af","sn","pd","ti","au","urban","ut","pt","vl","py","wos","bp","ep","j","race","di","study","review","class","journal","c","r","housing","affair","ei","neighborhood","economic","politics","education","jm","economics","political","k","weekly","l","segregation","galster","change","dec","planning","jt","association","league","e","anonymous","international","v","jun","research"],
    "1990s": ["housing","racial","planning","ti","change","af","difference","household","urban","policy","spatial","use","research","new","social","ab","segregation","group","discrimination","paper","public","state","city","result","american","study","model","article","class","characteristic","white","minority","economic","black","area","environmental","diversity","journal","community","evidence","ethnic","neighborhood","provide","income","concern","pattern","level","increase","sn","hispanic"],
    "2000s": ["public","racial","research","planning","article","housing","journal","ti","minority","development","policy","change","use","neighborhood","social","group","urban","park","american","study","new","diversity","space","community","af","ab","ethnic","city","pattern","economic","state","regional","ri","program","also","analysis","planner","literature","result","income","area","case","paper","segregation","oi","examines","local","place","work","association"],
    "2010s": ["american","planning","ti","journal","racial","research","housing","segregation","af","city","urban","new","regional","association","class","ab","ri","black","education","international","study","change","practice","neighborhood","use","state","minority","public","development","result","article","group","income","literature","white","analysis","also","sn","show","homeownership","increase","african","zone","oi","equity","pd","space","policy","understand","social"],
    "2020s": ["planning","journal","ti","af","american","black","research","practice","urban","planner","study","literature","community","association","experience","white","theory","oi","policy","justice","minority","education","racial","ab","united","housing","neighborhood","equity","ri","strategy","inequality","segregation","sn","problem","ethnic","analysis","finding","development","use","article","address","within","state","opportunity","ei","space","examine","among","youth","regional"]
}
# "1960s" is actually pre-1970s. Labeling for convienence. 

In [None]:
decade_keys = list(decades.keys())
similarity_results = {}

In [None]:
for i in range(len(decade_keys) - 1):
    decade1 = decade_keys[i]
    decade2 = decade_keys[i + 1]
    
    vectors1 = [model[word] for word in decades[decade1] if word in model]
    vectors2 = [model[word] for word in decades[decade2] if word in model]
    
    if vectors1 and vectors2:
        avg_vector1 = np.mean(vectors1, axis=0).reshape(1, -1)
        avg_vector2 = np.mean(vectors2, axis=0).reshape(1, -1)
        similarity = cosine_similarity(avg_vector1, avg_vector2)[0][0]
    else:
        similarity = None 

    similarity_results[f"{decade1} → {decade2}"] = similarity

In [None]:
for transition, score in similarity_results.items():
    print(f"Semantic shift from {transition}: {score:.3f}")

In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

In [None]:
decade_vectors = {}
for decade, words in decades.items():
    vectors = [model[word] for word in words if word in model]
    if vectors:
        avg_vector = np.mean(vectors, axis=0)
        decade_vectors[decade] = avg_vector

In [None]:
pca = PCA(n_components=2)
coords = pca.fit_transform(list(decade_vectors.values()))

In [None]:
plt.figure(figsize=(6, 4))
for (decade, (x, y)) in zip(decade_vectors.keys(), coords):
    plt.scatter(x, y, label=decade)
    plt.text(x+0.02, y+0.02, decade, fontsize=6)