In [98]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline
pd.options.display.float_format = '{:,.10f}'.format
plt.style.use('seaborn-white')
# colorblind safe
plt.style.use('seaborn-colorblind')
plt.style.use('tableau-colorblind10')

# width and precision for f strings
width = 10
precision = 4

# default sizes for plots
# https://matplotlib.org/3.3.0/tutorials/introductory/customizing.html#customizing-with-matplotlibrc-files
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['font.size'] = 16
plt.rcParams['legend.fontsize'] = 'large'
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['lines.linewidth'] = 2

# other settings
# pd.set_option("display.precision", 3)
# np.set_printoptions(precision=3, suppress=True)
%load_ext autoreload
%autoreload 2
pd.set_option('display.max_columns', None)
%config IPCompleter.greedy=True

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [99]:
# setup dir and import helper functions
import sys, os
sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'src'))
import helper_funcs as my_funcs
import re

In [100]:
directory = '/Users/pault/Desktop/github/CampsitePredict/data/symlink_data/only_unaugmented'

In [101]:
# get list of images in os.walk order
filelist = []
for root_path, dirs, files in os.walk(directory, followlinks=False):
    for file in files:
        if file.endswith(".png"):
            # only keep original files not augmented
            if not re.search('rot[0-9]{2,3}.png$', file):
                # parse out part of filename
                filelist.append(file[:-4])

In [102]:
file_df = pd.DataFrame(filelist)

In [103]:
filelist[:10]

['satimg_CO__352_Wild Camping_17_38.98102_-107.32651',
 'satimg_ID_7863_Wild Camping_17_43.149667_-111.052531',
 'satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999',
 'satimg_CO__216_Wild Camping_17_39.337122_-107.660378',
 'satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278',
 'satimg_MI_6491_Wild Camping_17_46.76277_-85.02438000000002',
 'satimg_UT_2806_Wild Camping_17_37.84661_-111.428193',
 'satimg_AK_12112_Wild Camping_17_63.887170999999995_-149.348656',
 'satimg_OR_6000_Wild Camping_17_44.413897_-120.495699',
 'satimg_PA_2971_Wild Camping_17_41.430395_-78.883376']

In [104]:
# make index to reorder the df the same
image_file_df = pd.DataFrame(filelist, columns =['filename']) 

In [105]:
image_file_df

Unnamed: 0,filename
0,satimg_CO__352_Wild Camping_17_38.98102_-107.32651
1,satimg_ID_7863_Wild Camping_17_43.149667_-111.052531
2,satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999
3,satimg_CO__216_Wild Camping_17_39.337122_-107.660378
4,satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278
...,...
7850,satimg_UT_1580_Established Campground_17_38.482453_-109.741828
7851,satimg_FL_3132_Established Campground_17_25.849862_-80.989081
7852,satimg_TN_3372_Established Campground_17_35.613972_-88.040368
7853,satimg_SD_2626_Established Campground_17_44.361324_-97.13078


In [106]:
def func(f, n=4):
    result =['.'.join([j[:n] for j in i.split('.')]) for i in f['filename'].split('_')[-2:]]
    return {'lat_from_file':result[0], 'long_from_file':result[1]}

In [107]:
latlongs = image_file_df[['filename']].apply(func, axis = 1, result_type = 'expand')

In [108]:
image_file_df = pd.concat([image_file_df, latlongs], axis=1)

In [109]:
image_file_df['order'] = image_file_df.index

In [87]:
# long = list(image_file_df['long_from_file'].values)

In [110]:
image_file_df.head(13)

Unnamed: 0,filename,lat_from_file,long_from_file,order
0,satimg_CO__352_Wild Camping_17_38.98102_-107.32651,38.981,-107.3265,0
1,satimg_ID_7863_Wild Camping_17_43.149667_-111.052531,43.1496,-111.0525,1
2,satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999,35.2375,-102.8349,2
3,satimg_CO__216_Wild Camping_17_39.337122_-107.660378,39.3371,-107.6603,3
4,satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278,34.1692,-110.7942,4
5,satimg_MI_6491_Wild Camping_17_46.76277_-85.02438000000002,46.7627,-85.0243,5
6,satimg_UT_2806_Wild Camping_17_37.84661_-111.428193,37.8466,-111.4281,6
7,satimg_AK_12112_Wild Camping_17_63.887170999999995_-149.348656,63.8871,-149.3486,7
8,satimg_OR_6000_Wild Camping_17_44.413897_-120.495699,44.4138,-120.4956,8
9,satimg_PA_2971_Wild Camping_17_41.430395_-78.883376,41.4303,-78.8833,9


In [111]:
len(image_file_df['filename'].unique())

7855

In [112]:
image_file_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7855 entries, 0 to 7854
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   filename        7855 non-null   object
 1   lat_from_file   7855 non-null   object
 2   long_from_file  7855 non-null   object
 3   order           7855 non-null   int64 
dtypes: int64(1), object(3)
memory usage: 245.6+ KB


In [113]:
pd.set_option('max_colwidth', None)
image_file_df.iloc[79]

filename          satimg_CA_4982_Wild Camping_17_33.75881500000001_-118.14552
lat_from_file                                                         33.7588
long_from_file                                                      -118.1455
order                                                                      79
Name: 79, dtype: object

In [114]:
image_file_df2 = image_file_df.copy()
image_file_df2['latlong_test'] = image_file_df2['lat_from_file'].astype('str') + '_' + image_file_df2['long_from_file'].astype('str')

In [115]:
image_file_df2.head()

Unnamed: 0,filename,lat_from_file,long_from_file,order,latlong_test
0,satimg_CO__352_Wild Camping_17_38.98102_-107.32651,38.981,-107.3265,0,38.9810_-107.3265
1,satimg_ID_7863_Wild Camping_17_43.149667_-111.052531,43.1496,-111.0525,1,43.1496_-111.0525
2,satimg_TX_6214_Wild Camping_17_35.2375_-102.83496099999999,35.2375,-102.8349,2,35.2375_-102.8349
3,satimg_CO__216_Wild Camping_17_39.337122_-107.660378,39.3371,-107.6603,3,39.3371_-107.6603
4,satimg_AZ_6033_Wild Camping_17_34.169239000000005_-110.794278,34.1692,-110.7942,4,34.1692_-110.7942


In [116]:
image_file_df2.iloc[79]

filename          satimg_CA_4982_Wild Camping_17_33.75881500000001_-118.14552
lat_from_file                                                         33.7588
long_from_file                                                      -118.1455
order                                                                      79
latlong_test                                                33.7588_-118.1455
Name: 79, dtype: object

In [117]:
image_file_df2.to_csv('../data/image_file_dfNEW.csv')