# NASA acronyms

#### What, if anything, can we learn from NASA's acronyms?

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt

In [3]:
import json
import numpy as np
import altair as alt
import altair_latimes as lat

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [5]:
alt.renderers.enable("png")

RendererRegistry.enable('png')

In [6]:
# https://github.com/nasa/NASA-Acronyms

### Fetch data

In [7]:
df = pd.read_json(
    "https://raw.githubusercontent.com/nasa/NASA-Acronyms/master/acronyms.json"
)

In [8]:
len(df)

42527

In [9]:
df.head()

Unnamed: 0,abbreviation,source_id,acronym_id,expansion,source
0,(E)CCU,8,0,(EUS)Camera Controller Unit,MAF-SMA
1,(E)FILMRS,8,1,(EUS)Flight Imaging Launch Monitoring Real-Tim...,MAF-SMA
2,(V)OMR,8,2,(Vehicle) Operations and Maintenance Requirements,MAF-SMA
3,0-G,2,0,Zero Gravity,LaRC
4,1FT,8,535,One Fault Tolerant,MAF-SMA


### Get the first character of the acronym

In [10]:
df["abbreviation_clean"] = df["abbreviation"].str.replace("[^a-zA-Z]", "")

In [11]:
df["first_character"] = df["abbreviation_clean"].str[0].str.upper()

### Group by the first characters

In [12]:
df.first_character.value_counts("normalize")

S    0.115012
C    0.087617
P    0.077999
M    0.074731
A    0.073602
I    0.057823
E    0.055354
T    0.048653
D    0.047524
R    0.045713
F    0.044584
L    0.041528
N    0.034920
O    0.033556
H    0.031534
G    0.031228
V    0.022457
B    0.022316
U    0.014485
W    0.013662
J    0.007972
K    0.007454
Q    0.005338
X    0.002540
Z    0.001340
Y    0.001058
Name: first_character, dtype: float64

In [13]:
characters = df.groupby(["first_character"]).size().reset_index(name="count")

In [14]:
characters_sources = (
    df.groupby(["first_character", "source"]).size().reset_index(name="count")
)

In [15]:
characters_sources.head()

Unnamed: 0,first_character,source,count
0,A,AES,3
1,A,EGS,114
2,A,Earthdata,57
3,A,GSFC-Prop,22
4,A,HSF,952


### Chart the characters

In [16]:
bars = (
    alt.Chart(characters)
    .mark_bar()
    .encode(x=alt.X("first_character", title="Character"), y=alt.Y("count", title=" "))
)

text = bars.mark_text(align="center", baseline="middle", dy=-10).encode(
    text=alt.Text("count:Q", format=",.0f")
)

In [17]:
(bars + text).properties(
    width=800, height=400, title="First characters of NASA abbreviations"
)

--- 

## Exports

In [18]:
df.to_csv("output/nasa_acronyms.csv", index=False)