# Exploration: Mining Data

This notebook reflects an exploration into several mining data sources: 
1. USGS - US Monitored Active Mines and Mineral Plants
    - Original Data is a shape file. 
    - Source: https://mrdata.usgs.gov/mineplant/
    - Expect data to contain a wide variety of mine and mineral types
    - Likely categorical data only. This data acts as 
2. Uranium Mine Data 
    - Original data is shape file.
    - Source: Spladder

In [25]:
#Libraries: 
#Basic py: 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import os

pd.options.display.max_columns = 50

## Import the Data

In [32]:
path = './Mining/'
data_dict = {}

#Let's store this data in a dictionary: 
for files in os.listdir(path):
    if (files.split('.')[1] == 'txt'):
        data_dict[files.split('.')[0] + '_df'] =pd.read_csv(path +
                                                    files, sep=",", header=0) 
#Save the dataframes
uran_bound_df = data_dict['Uranium_Boundaries_List_df']
uran_pprod_df = data_dict['Uranium_Past_Producers_df']
uran_perm_df = data_dict['Utah_Uranium_Permits_df']
asb_df = data_dict['Asbestos_fUS49_UT_df']

In [43]:
uran_pprod_df = uran_pprod_df.replace(r'^\s*$', np.nan, regex=True)
print(uran_pprod_df.isna().sum().sort_values().to_string())

FID              0
STATUS           0
UTM_Z            0
UTM_E            0
UTM_N            0
Q2_SCALE         0
REF              0
PROD            26
COMMOD          26
COUNTY          26
STATE           26
COUNTRY         26
ZID             26
REC_NO          26
REC_TYPE        26
REP_DATE        27
MERIDIAN        32
REP             34
SITE_NAME       37
RANGE           39
TOWNSHIP        39
FIL_LINK        84
SECTION        101
REP_AFF        112
RF1            214
ACC            260
DIST           265
POSITION       367
HR_TYPE        460
HR_AGE         477
LONG           478
LAT            478
PHYS           479
DRAIN          480
ELEV           483
NAT_DISC       496
DEP_NUM        497
DEP_TYPE       504
FORM_NAME      518
FORM_AGE       524
LOCATION       525
LAND_ST        528
DESC_WORK      530
DEP_SIZE       538
TECT_SET       549
MAJOR          575
DEP_FORM       577
ORE_MAT        590
QUAD2          610
DEP_T_U        611
M_T_U          639
MAX_THICK      640
LEN_WK      

In [44]:
uran_pprod_df[['FID','CP_YEAR','TOWNSHIP','SITE_NAME','LAT','LONG']]

Unnamed: 0,FID,CP_YEAR,TOWNSHIP,SITE_NAME,LAT,LONG
0,0,PRE-1956; .1,013S,YELLOW BIRD CLAIM,39-42-20N,113-35-03W
1,1,,040S,BULLOCH CLAIM,37-20-30N,112-51-30W
2,2,,043S,SKYLINE ROAD,37-01-07N,110-15-36W
3,3,,043S,TOM HOLLIDAY PROSPECT,37-01-07N,110-15-36W
4,4,,043S,MONUMENT 3,37-01-05N,110-19-13W
...,...,...,...,...,...,...
1211,1211,,,,,
1212,1212,,,,,
1213,1213,,,,,
1214,1214,,,,,


In [46]:
uran_pprod_df.shape

(1216, 153)

In [54]:
uran_pprod_years = uran_pprod_df[uran_pprod_df.loc[:,'CP_YEAR'].notna()]
uran_pprod_township = uran_pprod_df[uran_pprod_df.loc[:,'TOWNSHIP'].notna()]
print(uran_pprod_years.shape)
print(uran_pprod_township.shape)

(497, 153)
(1177, 153)


In [63]:
uran_pprod_years.loc[:,'TOWNSHIP'].value_counts()

024S       76
023S       51
035S       50
032S       42
031S       32
037S       29
033S       23
022S       23
021S       20
025S       20
034S       19
036S       19
026S       14
030S       12
027S       12
020S        7
029S        7
028S        4
013S        3
039S        3
040S        3
043S        3
009S        2
038S        2
018S        2
031S 03     1
029S 02     1
033S 03     1
024S 02     1
041S        1
/24S        1
026S 02     1
035S 03     1
032S 03     1
032E        1
042S        1
006S;       1
Name: TOWNSHIP, dtype: int64

## Conclusion: This Data needs quite a bit of cleaning. The export is not perfect by any means

In [66]:
asb_df

Unnamed: 0,FID,rec_id,state,county,site_name,devel,latitude,longitude,oremin,assocmin,hostrock,source
0,0,393,UT,Beaver,Big Pass group,occurrence,38.303799,-112.8216,"short-fiber chrysotile (asbestos)"""", fibrous t...","garnet, epidote, wollastonite, fluorite, pyrit...",calcite-wollastonite marble,ofr20081095
1,1,394,UT,Beaver,King David mine,occurrence,38.451599,-113.2778,"asbestos""""""""","alunite, marble [calc-silicate minerals], arge...",marble,ofr20081095
2,2,395,UT,Millard,Tremolite no. 1 mine,past producer,38.581501,-112.8409,mass-fiber tremolite asbestos,"calcite, """"potash"""", malachite, chalcopyrite, ...",marble (dolomitic),ofr20081095
3,3,396,UT,Duchesne,unnamed occurrences in Avintaquin Canyon,occurrence,39.979999,-110.86,asbestiform magnesioarfvedsonite,"dolomite, quartz, hornblende, plagioclase","dolomitic marlstone (""""""""oil shale"""""""")",ofr20081095
4,4,397,UT,Salt Lake,Bingham stock,occurrence,40.516998,-112.157,"asbestos"""", finely fibrous actinolite","numerous minerals reported--see Hunt (1924), L...",dolomitic copper skarn,ofr20081095
5,5,398,UT,Davis,occurrences in Bair Canyon,occurrence,41.041,-111.859,"slip-fiber tremolite asbestos, fibrous actinolite","microcline feldspar, quartz, actinolite, mica,...",pegmatite sill,ofr20081095
6,6,399,UT,Box Elder,Pack Rat tremolite prospect,past prospect,41.199402,-113.3375,tremolite asbestos,dolomite,dolomitic marble,ofr20081095


In [1]:
#This is a test for github
yikes = np.array(1)

NameError: name 'np' is not defined