In [1]:
import sys
sys.path.append("..")

from src.webapp_utility import Loader

import warnings
warnings.filterwarnings('ignore')

In [2]:
l = Loader()

Loading full count vectorizers... Done
Loading full lda model... Done
Loading small lda model... Done
Loading word embeddings... Done


### Word frequency

In [3]:
l.get_freq_distribution(["gun", "robbery"], interval=10)

[(1880, 0.00013719303059404582),
 (1890, 8.091269520187717e-05),
 (1900, 0.00022119000221190003),
 (1910, 0.00039039625219597894),
 (1920, 0.007709251101321586),
 (1930, 0.010703363914373088),
 (1940, 0.006501472031403337),
 (1950, 0.009668508287292817),
 (1960, 0.02900030634126417),
 (1970, 0.04859773964001674),
 (1980, 0.04818485185559713),
 (1990, 0.043245287776587024),
 (2000, 0.040356083086053415),
 (2010, 0.051215277777777776)]

In [4]:
l.get_freq_distribution(["cocaine"], interval=20)

[(1880, 5.0895765472312706e-05),
 (1900, 0.0005184570717544588),
 (1920, 0.00011130073458484825),
 (1940, 0.00020452686119443688),
 (1960, 0.003354808063414779),
 (1980, 0.04609859431553548),
 (2000, 0.08035872846741254)]

### Embeddings

In [5]:
l.get_n_similar(word="gun", n=3, model_type="full")

[]

In [6]:
l.get_n_similar(word="gun", n=3, model_type="one", year=2000)

[('knife', 0.8641738891601562),
 ('handgun', 0.8565401434898376),
 ('shotgun', 0.8561012148857117)]

In [7]:
l.get_n_similar(word="gun", n=3, model_type="ten", year=2000)

[('handgun', 0.8175308704376221),
 ('pistol', 0.7670312523841858),
 ('shotgun', 0.7456485033035278)]

### Topics

In [8]:
l.get_topic_dist(["gun"], model="small")

{0: 0.44,
 1: 0.338,
 2: 21.027,
 3: 48.96,
 4: 0.002,
 5: 0.015,
 6: 0.146,
 7: 0.238,
 8: 2.236,
 9: 0.0,
 10: 11.288,
 11: 0.058,
 12: 0.001,
 13: 15.251}

In [9]:
l.get_topic_dist(["gun", "cocaine"], model="big")

{0: 43.027,
 1: 0.0,
 2: 2.257,
 3: 0.0,
 4: 0.004,
 5: 0.0,
 6: 0.0,
 7: 0.062,
 8: 0.009,
 9: 0.0,
 10: 0.0,
 11: 0.001,
 12: 0.0,
 13: 54.641}

In [10]:
l.get_topics_words(n=5, model="big")
# l.get_topics_words(n=5, model="small")

{0: [('testify', 0.25),
  ('people', 0.201),
  ('testimony', 0.195),
  ('jury', 0.179),
  ('witness', 0.174)],
 1: [('agreement', 0.231),
  ('fee', 0.229),
  ('award', 0.183),
  ('petitioner', 0.18),
  ('attorney', 0.177)],
 2: [('sentence', 0.26),
  ('people', 0.251),
  ('counsel', 0.169),
  ('offense', 0.162),
  ('charge', 0.158)],
 3: [('board', 0.298),
  ('commission', 0.237),
  ('employee', 0.166),
  ('claimant', 0.153),
  ('decision', 0.146)],
 4: [('respondent', 0.417),
  ('statement', 0.149),
  ('information', 0.148),
  ('testify', 0.145),
  ('mental', 0.14)],
 5: [('plaintiff', 0.582),
  ('motion', 0.158),
  ('complaint', 0.123),
  ('rule', 0.07),
  ('dismiss', 0.066)],
 6: [('property', 0.279),
  ('city', 0.222),
  ('plaintiff', 0.207),
  ('use', 0.162),
  ('lease', 0.13)],
 7: [('child', 0.319),
  ('section', 0.211),
  ('school', 0.193),
  ('petition', 0.141),
  ('board', 0.137)],
 8: [('plaintiff', 0.325),
  ('car', 0.207),
  ('jury', 0.164),
  ('injury', 0.163),
  ('neglig

In [14]:
l.get_topics_date_distribution(topic=0)[:5]

[[1770, 0.1767345440458309],
 [1815, 0.0006601829626994073],
 [1820, 0.0215818322676799],
 [1825, 0.008537321355360113],
 [1830, 0.00585200424269593]]

In [12]:
l.get_topics_description(topic=0)

'Topic 0 description'

### Semantic

In [13]:
c = l.get_semantic_data("cocaine", base_year=2010)
print(c.keys())
c["one_year"]

dict_keys(['one_year', 'ten_year'])


[(2010, 1.0000001),
 (2009, 0.8796336),
 (2008, 0.89783674),
 (2007, 0.8537258),
 (2006, 0.85475296),
 (2005, 0.87689704),
 (2004, 0.85835254),
 (2003, 0.8532518),
 (2002, 0.84030765),
 (2001, 0.86791134),
 (2000, 0.83747065),
 (1999, 0.87141716),
 (1998, 0.822067),
 (1997, 0.8828443),
 (1996, 0.862307),
 (1995, 0.85867),
 (1994, 0.7618349),
 (1993, 0.743306),
 (1992, 0.70073444),
 (1991, 0.7308293),
 (1990, 0.7397585),
 (1989, 0.7238263),
 (1988, 0.75458986),
 (1987, 0.79445523),
 (1986, 0.7920416),
 (1985, 0.76328516),
 (1984, 0.7869816),
 (1983, 0.77198064),
 (1982, 0.8190136),
 (1981, 0.66725534),
 (1980, 0.6760835),
 (1979, 0.64976573),
 (1978, 0.7396287),
 (1977, 0.625686),
 (1976, 0.693496),
 (1975, 0.7067592),
 (1974, 0.5828098),
 (1973, -1),
 (1972, -1),
 (1971, -1),
 (1970, -1),
 (1969, -1),
 (1968, -1),
 (1967, -1),
 (1966, -1),
 (1965, -1),
 (1964, -1),
 (1963, -1),
 (1962, -1),
 (1961, -1),
 (1960, -1),
 (1959, -1),
 (1958, -1),
 (1957, -1),
 (1956, -1),
 (1955, -1),
 (195