# Scrape Morse Code from a table
Created by Taishi Nammoto


In [1]:
import pandas as pd

In [2]:
url = 'http://www.sckans.edu/~sireland/radio/code.html'
dfs = pd.read_html(url)

In [3]:
# Parse the columns 
for df in dfs:
    print(df.columns)

Index(['Letter', 'Morse', 'NATO', 'English', 'American', 'Italian', 'German',
       'International'],
      dtype='object')
Index(['Number', 'Code', 'Punctuation', 'Code.1'], dtype='object')


In [4]:
# Look at the first table
dfs[0].head()

Unnamed: 0,Letter,Morse,NATO,English,American,Italian,German,International
0,A,*-,Alfa,Andrew,Able,Ancona,Anton,Amsterdam
1,B,-***,Bravo,Benjamin,Baker,Bologna,Berta,Baltimore
2,C,-*-*,Charlie,Charlie,Charlie,Como,Casar,Casablanca
3,D,-**,Delta,David,Dog,Domodossola,Dora,Denmark
4,E,*,Echo,Edward,Easy,Empoli,Emil,Edision


In [5]:
# Look at the second table
dfs[1].head()

Unnamed: 0,Number,Code,Punctuation,Code.1
0,1,*----,Period,*-*-*-
1,2,**---,Comma,--**--
2,3,***--,Colon,---***
3,4,****-,Question Mark,**--**
4,5,*****,Apostrophe,*----*


In [6]:
# Select Letter and Morse columns from the first table 
df = dfs[0][['Letter', 'Morse']]
df.head()

Unnamed: 0,Letter,Morse
0,A,*-
1,B,-***
2,C,-*-*
3,D,-**
4,E,*


In [7]:
# Select Punctuation and Code from the second table
df_punc = dfs[1][['Punctuation', 'Code.1']]
df_punc.columns = ['Letter','Morse']
df_punc

Unnamed: 0,Letter,Morse
0,Period,*-*-*-
1,Comma,--**--
2,Colon,---***
3,Question Mark,**--**
4,Apostrophe,*----*
5,Hyphen,-****-
6,Fraction Bar,-**-*
7,Parentheses,-*--*-
8,Quotation Marks,*-**-*
9,,


In [8]:
# Select Number and Code from the second table
df_num = dfs[1][['Number', 'Code']]
df_num.columns = ['Letter','Morse']
df_num

Unnamed: 0,Letter,Morse
0,1,*----
1,2,**---
2,3,***--
3,4,****-
4,5,*****
5,6,-****
6,7,--***
7,8,---**
8,9,----*
9,0,-----


In [9]:
# Remove the Null values
df_punc = df_punc.dropna()

# Change the names of the punctuation to the symbols (important symbols only) 
punc_dict = {'Period':'.', 'Comma':',', 'Question Mark':'?'}

# Replace the names with the symbols
df_punc = df_punc.assign(Letter=df_punc['Letter'].map(punc_dict))
df_punc['Letter']

0      .
1      ,
2    NaN
3      ?
4    NaN
5    NaN
6    NaN
7    NaN
8    NaN
Name: Letter, dtype: object

In [10]:
# Drop the Null values
df_punc = df_punc.dropna()
df_punc

Unnamed: 0,Letter,Morse
0,.,*-*-*-
1,",",--**--
3,?,**--**


In [11]:
# Combine all of the dataframes

df = pd.concat([df, df_num, df_punc], ignore_index=True)

In [12]:
# make sure no more None values
df.isna().sum()

Letter    0
Morse     0
dtype: int64

In [13]:
# df['Morse'].replace('*','.') did not work, so I used for loop

for i in range(len(df)):
    df['Morse'][i] = df['Morse'][i].replace('*','.')

In [14]:
df.head()

Unnamed: 0,Letter,Morse
0,A,.-
1,B,-...
2,C,-.-.
3,D,-..
4,E,.


In [15]:
# Save the df to a csv file
df.to_csv('Data/morse_data.csv')