To pair satellite images with PM and Met, this script requires:
- CSV with location names and coordinates 
- Folder with all locations' PM in seperate .xls files. .xls files should be named after location
- CSV with meteorology

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals


# Common imports
import numpy as np
import pandas as pd
import os
# to make this notebook's output stable across runs
np.random.seed(42)

import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

import rasterio
from rasterio.merge import merge
from rasterio.plot import show

import json
from shapely.geometry import shape
from shapely.ops import transform as stransform
from rasterio.mask import mask
from affine import Affine
from os import listdir
from os.path import isfile, join
from tqdm import tqdm
import pyproj
import pickle as pkl
import geojson
import cv2

from rasterio.warp import transform as rtransform
from rasterio.transform import rowcol

import PIL
from PIL import Image, ImageEnhance
import sys
# from skimage import io

os.getcwd()

'/Users/sarah/Satellite_Image_Processing'

In [2]:
# #coordinates
my_coordinates = pd.read_csv('../Desktop/DHAKA.csv')
my_coordinates

Unnamed: 0,SITE,LAT,LON
0,Dhaka,23.796725,90.42202


In [3]:
PM25_dict = {}
for file in sorted(listdir('../Documents/Kath_PM/Dhaka')):  #[1:] because MacOS has a weird first output
    print(file)
    if '.xls' in file:
        site_name = file.split('.xls')[0]
#         print(site_name)
        site_coordinate = (my_coordinates[my_coordinates.SITE == site_name].values[0,1],\
                            my_coordinates[my_coordinates.SITE == site_name].values[0,2])
        print(site_coordinate)
        try:
            df = pd.read_excel(join('../Documents/Kath_PM/Dhaka', file))
            df['DATE'] = pd.to_datetime(df['DATE'],dayfirst=True) 
            df = df.set_index(pd.DatetimeIndex(df['DATE']))
            df_PM25 = df['PM25'].to_frame().dropna()
            
            
            if len(df_PM25)>0:
                PM25_dict[site_name] = {'PM25':df_PM25, 'coordinate':site_coordinate}
       
        except Exception as e:
            print(e.args)

Dhaka.csv
Dhaka.xls
(23.796725, 90.42201999999999)


In [4]:
PM25_dict

{'Dhaka': {'PM25':             PM25
  DATE            
  2017-01-01   249
  2017-01-02   234
  2017-01-03   165
  2017-01-04   152
  2017-01-05   224
  ...          ...
  2021-12-27   195
  2021-12-28   193
  2021-12-29    82
  2021-12-30   136
  2021-12-31   103
  
  [1680 rows x 1 columns],
  'coordinate': (23.796725, 90.42201999999999)}}

In [5]:
meteorology_raw_concat_no_NA_daily_avg_final = pd.read_csv('../Downloads/dhaka_met.csv')
meteorology_raw_concat_no_NA_daily_avg_final['DATE'] = pd.to_datetime(meteorology_raw_concat_no_NA_daily_avg_final['DATE'])
meteorology_raw_concat_no_NA_daily_avg_final = meteorology_raw_concat_no_NA_daily_avg_final.set_index(pd.DatetimeIndex(meteorology_raw_concat_no_NA_daily_avg_final['DATE']))

meteorology_raw_concat_no_NA_daily_avg_final.drop(['DATE'], axis = 1, inplace = True)

meteorology_raw_concat_no_NA_daily_avg_final

Unnamed: 0_level_0,TEMP,VISIB,WDSP,RH
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-01,68.5,1.1,0.8,70.0
2017-01-02,70.2,1.4,1.1,69.0
2017-01-03,70.0,1.5,3.5,69.0
2017-01-04,69.2,1.9,6.1,64.0
2017-01-05,66.9,0.9,5.6,81.0
...,...,...,...,...
2021-12-27,71.7,1.9,0.2,64.0
2021-12-28,71.3,1.8,0.8,65.0
2021-12-29,70.9,1.8,1.4,65.0
2021-12-30,71.1,2.0,3.3,64.0


In [6]:
def PM_Meteo_Matcher(Processed_PM_dfs_concat, coords_AQM):
    #load PM2.5
    Processed_PM_dfs_concat = Processed_PM_dfs_concat.apply(lambda x: pd.to_numeric(x))
    
    cur_lat, cur_long = coords_AQM 
#     print(Processed_PM_dfs_concat, cur_lat, cur_long)
    #sort meteorology stations based on their distances to the AQM station
    #sort default in ascending order
    #meteo_stations_closest_in_order = [station_id for station_id ,_, _ in \
    #        sorted(zip(meteo_station_IDs, meteo_station_lats, meteo_station_longs), \
    #               key=lambda triplet: geopy.distance.vincenty(coords_AQM, (triplet[1], triplet[2])).km)]
    
    #Matching meteo to PM2.5
    Processed_PM_dfs_concat_meteo = pd.merge(Processed_PM_dfs_concat, meteorology_raw_concat_no_NA_daily_avg_final,\
                                             left_index=True,right_index=True,how='inner').resample('1d').mean().dropna()
    '''
    for station_id in meteo_stations_closest_in_order[1:]:
        meteo_missing_index  = Processed_PM_dfs_concat_meteo[Processed_PM_dfs_concat_meteo.iloc[:,1:].isna().any(1)].index
        if len(meteo_missing_index)>0:
            #print(meteo_missing_index)
            Processed_PM_dfs_concat_meteo.loc[meteo_missing_index,kept_features] = \
            Processed_PM_dfs_concat_meteo.loc[meteo_missing_index,kept_features].fillna(LA_meteo[station_id].loc[meteo_missing_index,kept_features])
        else:
            break
    '''
    #Processed_PM_dfs_concat_meteo.dropna(inplace = True)
    return Processed_PM_dfs_concat_meteo

In [7]:
PM25_meteo_matched = {key:PM_Meteo_Matcher(PM25_dict[key]['PM25'], PM25_dict[key]['coordinate']) for key in PM25_dict.keys()}

In [8]:
PM25_meteo_matched

{'Dhaka':              PM25  TEMP  VISIB  WDSP    RH
 DATE                                      
 2017-01-01  249.0  68.5    1.1   0.8  70.0
 2017-01-02  234.0  70.2    1.4   1.1  69.0
 2017-01-03  165.0  70.0    1.5   3.5  69.0
 2017-01-04  152.0  69.2    1.9   6.1  64.0
 2017-01-05  224.0  66.9    0.9   5.6  81.0
 ...           ...   ...    ...   ...   ...
 2021-12-27  195.0  71.7    1.9   0.2  64.0
 2021-12-28  193.0  71.3    1.8   0.8  65.0
 2021-12-29   82.0  70.9    1.8   1.4  65.0
 2021-12-30  136.0  71.1    2.0   3.3  64.0
 2021-12-31  103.0  72.1    2.2   5.6  58.0
 
 [1677 rows x 5 columns]}

In [50]:
final_image_path = 'Documents/planet_imgs/Dhaka/20211220_050630_1061_3B_Visual_clip.tif'
station_index = final_image_path.split('/')[2]
time_index = final_image_path.split('/')[-1].split('_')[1]
time_index = time_index[0:2]+':'+time_index[2:4]+':' +time_index[4:]
id_index = final_image_path.split('/')[-1].split('_')[2]
id_index
date_index = final_image_path.split('/')[-1].split('_')[0]

date_index = date_index[0:4] + '-' + date_index[4:6] + '-' + date_index[6:]
# print(date_index[5:7])
date_index

'2021-12-20'

In [53]:
matching_PM25 = PM25_meteo_matched[station_index][PM25_meteo_matched[station_index].index == date_index].iloc[0,0]
matching_PM25

matching_meteo = PM25_meteo_matched[station_index][PM25_meteo_matched[station_index].index == date_index].iloc[0,1:]
matching_meteo

TEMP     67.8
VISIB     2.0
WDSP      5.5
RH       61.0
Name: 2021-12-20 00:00:00, dtype: float64

In [95]:
def Imagery_matcher(final_image_path):
    try:
        print('site loading')
        im = np.moveaxis(rasterio.open(final_image_path).read(), 0, 2)
        img_arr = np.array(im)
#         print('read in working')
    except:
        print(final_image_path+' cannot be loaded')
        return None
    
    black_space = np.mean(img_arr[:,:,:]/255)

#     if black_space > 70 or black_space <= 10:
#         return None
    if black_space > .91 or black_space <= 0.4:
#         print(black_space)
#         plt.imshow(im)
#         plt.show()
        return None
    else:
        
        station_index = final_image_path.split('/')[2]
        time_index = final_image_path.split('/')[-1].split('_')[1]
        time_index = time_index[0:2]+':'+time_index[2:4]+':' +time_index[4:]
        date_index = final_image_path.split('/')[-1].split('_')[0]
        date_index = date_index[0:4] + '-' + date_index[4:6] + '-' + date_index[6:]
        id_index = final_image_path.split('/')[-1].split('_')[2]
#         print('indexes working')
#         plt.imshow(im)
#         plt.show()

    try:
        matching_PM25 = PM25_meteo_matched[station_index][PM25_meteo_matched[station_index].index == date_index].iloc[0,0]
        print('matched pm working')
        
        matching_meteo = PM25_meteo_matched[station_index][PM25_meteo_matched[station_index].index == date_index].iloc[0,1:]
        print('matched met working')
        
    except Exception as e:
        print('above failed')
        print(e.args)
        return None

    return({'Image':im, 'PM':matching_PM25, 'Meteo':matching_meteo, 'Date':date_index, 'Time_stamp':time_index,
            'Station_index': station_index, 'Sat_ID':id_index})




In [96]:
def Image_loader(image_folder_path):
    my_current_image_path = image_folder_path
    print(my_current_image_path)
    Matching_data_for_a_single_station = [Imagery_matcher(join(my_current_image_path, f)) for f in sorted(listdir(my_current_image_path)) \
                            if isfile(join(my_current_image_path, f)) and '.DS_Store' not in str(f)\
                                         and Imagery_matcher(join(my_current_image_path, f)) is not None]
    return Matching_data_for_a_single_station

In [97]:
AQM_data_for_model = Image_loader('../Desktop/Dhaka') #+ site) for site in my_coordinates.SITE]

../Desktop/Dhaka
site loading
above failed
('single positional indexer is out-of-bounds',)
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm w

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm w

matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
above failed
('single positional indexer is out-of-bounds',)
site loading
above failed
('single positional indexer is out-of-bounds',)
site loading
above failed
('single positional indexer is out-of-bounds',)
site loading
above failed
('single positional indexer is out-of-bounds',)
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched m

matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
above failed
('single positional indexer is out-of-bounds',)
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
site loading
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site load

matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm w

matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
above failed
('single positional indexer is out-of-bounds',)
site loading
above failed
('single positional indexer is out-of-bounds',)
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
m

site loading
above failed
('single positional indexer is out-of-bounds',)
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched me

matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm w

site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading

matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm w

matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading

site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading
matched pm working
matched met working
site loading

In [85]:
# for i in range(len(AQM_data_for_model)):
#     h = len(AQM_data_for_model[i])
#     print(np.sum(h))

In [15]:
with open("Desktop/K_224_met_aod_1721.pkl", "wb") as fp:
    pkl.dump(AQM_data_for_model, fp)

In [98]:
AQM_data_for_model[0]

{'Image': array([[[108, 117, 110, 255],
         [104, 113, 107, 255],
         [106, 113, 108, 255],
         ...,
         [175, 167, 152, 255],
         [174, 166, 150, 255],
         [173, 165, 149, 255]],
 
        [[116, 126, 117, 255],
         [106, 115, 108, 255],
         [112, 118, 114, 255],
         ...,
         [178, 170, 155, 255],
         [177, 170, 154, 255],
         [172, 166, 150, 255]],
 
        [[119, 127, 118, 255],
         [108, 116, 109, 255],
         [108, 116, 112, 255],
         ...,
         [183, 175, 159, 255],
         [180, 172, 156, 255],
         [171, 165, 149, 255]],
 
        ...,
 
        [[180, 173, 162, 255],
         [202, 194, 180, 255],
         [198, 191, 173, 255],
         ...,
         [121, 125, 119, 255],
         [125, 125, 118, 255],
         [148, 145, 134, 255]],
 
        [[166, 160, 149, 255],
         [212, 202, 188, 255],
         [204, 196, 181, 255],
         ...,
         [122, 126, 118, 255],
         [115, 116, 105, 2

In [87]:
df = pd.DataFrame(AQM_data_for_model, columns=['Image', 'PM', 'Meteo','Date','Time_stamp','Station_index','Sat_ID'])

In [88]:
del df['Meteo']

In [89]:
df

Unnamed: 0,Image,PM,Date,Time_stamp,Station_index,Sat_ID
0,../Desktop/Dhaka/20170121_034828_0e19_3B_Visua...,260.0,2017-01-21,03:48:28,Dhaka,0e19
1,../Desktop/Dhaka/20170130_034930_0e20_3B_Visua...,123.0,2017-01-30,03:49:30,Dhaka,0e20
2,../Desktop/Dhaka/20170201_093853_0c43_3B_Visua...,411.0,2017-02-01,09:38:53,Dhaka,0c43
3,../Desktop/Dhaka/20170201_100304_0c54_3B_Visua...,411.0,2017-02-01,10:03:04,Dhaka,0c54
4,../Desktop/Dhaka/20170205_080243_0c46_3B_Visua...,132.0,2017-02-05,08:02:43,Dhaka,0c46
...,...,...,...,...,...,...
995,../Desktop/Dhaka/20211226_034336_2440_3B_Visua...,166.0,2021-12-26,03:43:36,Dhaka,2440
996,../Desktop/Dhaka/20211226_034338_2440_3B_Visua...,166.0,2021-12-26,03:43:38,Dhaka,2440
997,../Desktop/Dhaka/20211226_040738_1025_3B_Visua...,166.0,2021-12-26,04:07:38,Dhaka,1025
998,../Desktop/Dhaka/20211229_040616_103c_3B_Visua...,82.0,2021-12-29,04:06:16,Dhaka,103c


In [94]:
img = PIL.Image.open(df['Image'][0])
img = Image(pil2tensor(img, dtype=np.float32).div_(255))
# df['Image'][0]

NameError: name 'pil2tensor' is not defined

<!-- def image_pairing(image_path):
    try:

#         im = Image.open(image_path).convert('LA')
        im = np.moveaxis(rasterio.open(image_path).read(), 0, 2)
        img_arr = np.array(im)
        sat_id = path.split('/')[3].split('_')[2]
        date = image_path.split('/')[3].split('_')[0]
        date = date[0:4] + '-' + date[4:6] + '-' + date[6:]
        time = image_path.split('/')[3].split('_')[1]
        time = time[0:2]+':'+time[2:4]+':' +time[4:]

#         print(img_arr, sat_id, date,time)
    except:
        print(image_path+' path cannot be found')

    black_space = np.mean(img_arr[:,:,:]/255)
# #     print(black_space)
# #     plt.imshow(im[:,:,1])
# #     plt.show()

# #     if black_space >= 65:
# #         print(black_space)
# #         plt.imshow(im[:,:,1])
# #         plt.show()

    matching_PM25 = PM25_meteo_matched[station_index][PM25_meteo_matched[station_index].index == image_time_index].iloc[0,0]
    matching_meteo = PM25_meteo_matched[station_index][PM25_meteo_matched[station_index].index == image_time_index].iloc[0,1:]
    if date not in kalanki \
    or black_space <= 10 \
    or black_space >= 70:
        print('invalid ', date)
        print(black_space)
#         plt.imshow(im[:,:,0])
#         plt.show()

    
    else:
        print(black_space)
        matching_PM25 = kalanki[date]
        print('valid ', date)

  
        return {'Image': im,'PM': matching_PM25, 'Date': date, 'Time': time, 'Sat_id': sat_id, 'Station': 'Kalanki'} -->

<!-- #meteo and PM
df         =  pd.read_excel("Downloads/Kath_PM/usembassy.xls")
dfbudhani  =  pd.read_excel('Downloads/Kath_PM/budhanilkantha.xls')
dfamerican =  pd.read_excel('Downloads/Kath_PM/americanclub.xls')
dfthamel   =  pd.read_excel('Downloads/Kath_PM/thamel.xls')
dfkote     =  pd.read_excel('Downloads/Kath_PM/koteshwore.xls')
dfkalanki  =  pd.read_excel('Downloads/Kath_PM/kalanki.xls')
dfgyn      =  pd.read_excel('Downloads/Kath_PM/gyaneshwore.xls')


usembassy    = {}
americanclub = {}
budhani      = {}
thamel       = {}
kote         = {}
kalanki      = {}
gyn          = {}

for index, row in df.iterrows():
    usembassy[row["DATE"]] = row["PM25"]
    
for index,row in dfamerican.iterrows():
    americanclub[row['DATE']] = row['PM25']

for index, row in dfbudhani.iterrows():
    budhani[row['DATE']] = row['PM25']

for index, row in dfthamel.iterrows():
    thamel[row['DATE']] = row['PM25']
    
for index, row in dfkote.iterrows():
    kote[row['DATE']] = row['PM25']
    
for index, row in dfkalanki.iterrows():
    kalanki[row['DATE']] = row['PM25']
    
for index, row in dfgyn.iterrows():
    dfgyn['DATE'] = pd.to_datetime(dfgyn['DATE'])
    dfgyn['DATE'].dt.strftime('%Y-%m-%d')
    gyn[row['DATE']] = row['PM25'] -->

In [None]:
# dfgyn      =  pd.read_excel('Downloads/Kath_PM/gyaneshwore.xls', converters= {'DATE': pd.to_datetime})
# for index, row in dfgyn.iterrows():
# #     dfgyn['DATE'] = pd.to_datetime(dfgyn['DATE'])
# #     dfgyn['DATE'].dt.strftime('%Y-%m-%d')
#     gyn[row['DATE']] = row['PM25']
# gyn

In [9]:
# #CRITICAL. Discarding of all images that do not match the regular shape size
# import os

# directory = 'Documents/planet_imgs/thamel_imgs'

# for filename in sorted(os.listdir(directory)):
#     if filename.endswith(".tif"): 
#         img = cv2.imread(os.path.join(directory, filename))
# #         plt.imshow(img)
# #         plt.show()
#         h,w,_ = img.shape
# #         print(h,w)
#         if (not (h ==479 and w == 479)):

#             print(os.path.join(directory, filename))
#             print(h,w)
#             os.remove(os.path.join(directory,filename))

# #         else:
# #             continue

In [11]:
# pm25_dict['usembassy']

In [12]:
# # embassy_list = []
# # for path in tqdm(testdir, position=0, leave=True):
# mypath = testdir + path
# #     img = Image.open(mypath)
# img = cv2.imread(testdir)    
# img_arr = np.array(img)
# print(np.mean(img_arr[:,:,:])/255)

# plt.imshow(img[:,:,1])
# plt.show()

# print(img.shape)
# name = os.path.basename(mypath).split('_')[0]
# name = name[0:4] + '-' + name[4:6] + '-' + name[6:]
    
# #     if name in avg_PM_emb \
# #     and img_arr.shape == (479, 479, 3) \
# #     and np.mean(img_arr[:,:,:])/255 >= 0.03:
            
# #         embassy_list.append([img_arr, avg_PM_emb[name]])


<!-- directory = 'Documents/planet_imgs/gyaneshwore'

for filename in sorted(os.listdir(directory)):
    print(filename)
#     sat_id = filename.split('_')[2]
#     print(sat_id)
# date = path.split('/')[3].split('_')[0]
# date = date[0:4] + '-' + date[4:6] + '-' + date[6:]
# print(date)

# time = path.split('/')[3].split('_')[1]
# time = time[0:2]+':'+time[2:4]+':' +time[4:]
# print(time) -->

<!-- def image_pairing(image_path):
    
    try:
#         im = Image.open(image_path).convert('LA')
        im = np.moveaxis(rasterio.open(image_path).read(), 0, 2)
        img_arr = np.array(im)
        sat_id = path.split('/')[3].split('_')[2]
        date = image_path.split('/')[3].split('_')[0]
        date = date[0:4] + '-' + date[4:6] + '-' + date[6:]
        time = image_path.split('/')[3].split('_')[1]
        time = time[0:2]+':'+time[2:4]+':' +time[4:]

        print(img_arr, sat_id, date,time)
    except:
        print(image_path+' path cannot be found')

    black_space = np.mean(img_arr[:,:,:]/255)
# #     print(black_space)
    plt.imshow(im[:,:,1])
    plt.show()
#     print(sat_id)

# #     if black_space >= 65:
# #         print(black_space)
# #         plt.imshow(im[:,:,1])
# #         plt.show()
    if date not in gyn \
    or black_space <= 10 \
    or black_space >= 70:
        print('invalid ', date)
        print(black_space)
#         plt.imshow(im[:,:,0])
#         plt.show()

    
    else:
        print(black_space)
        matching_PM25 = gyn[date]
        print('valid ', date)

  
        return {'Image': im,'PM': matching_PM25, 'Date': date, 'Time': time, 'Sat_id': sat_id, 'Station': 'Gyn'} -->

13 <= club <= 70

10 <= USembassy <= 65

10 <= budhani <= 70

<!-- # def Image_loader(image_folder_path):
#     my_current_image_path = image_folder_path
#     Matching_data_for_a_single_station = [image_pairing(join(my_current_image_path,f)) for f in sorted(listdir(my_current_image_path)) \
#                                       if isfile(join(my_current_image_path, f)) and '.DS_Store' not in str(f)]

#     Matching_data_for_a_single_station = list(filter(None, Matching_data_for_a_single_station))
#     return Matching_data_for_a_single_station -->

<!-- # American_club = Image_loader('Documents/planet_imgs/americanclub')
# US_embassy = Image_loader('Documents/planet_imgs/usembassy')
# budhani = Image_loader('Documents/planet_imgs/bud_imgs')
# thamel = Image_loader('Documents/planet_imgs/thamel_imgs')
# kote = Image_loader('Documents/planet_imgs/kote_imgs')
# kalank = Image_loader('Documents/planet_imgs/kalanki_imgs')
gyna = Image_loader('Documents/planet_imgs/gyaneshwore/') -->