-
Notifications
You must be signed in to change notification settings - Fork 1
/
myrequest.py
145 lines (112 loc) · 5.48 KB
/
myrequest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import requests as req
import pandas as pd
from datetime import datetime
from ast import literal_eval
import os
import xarray as xr
import numpy as np
import os
import gspread
from oauth2client.service_account import ServiceAccountCredentials
def getsheet():
json_keyfile = '/home/naomi/json/CMIP6-d0cb1df722d1.json'
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name(json_keyfile, scope)
gc = gspread.authorize(credentials)
sheet_name = "CMIP6 GCS Data Request (Responses)"
sh = gc.open(sheet_name)
wks = sh.worksheet("Form Responses 1")
data = wks.get_all_values()
headers = data.pop(0)
df = pd.DataFrame(data, columns=headers)
# For the 'good old days' when I could let everyone view the sheet
#KEY = '1SGTSK_h4xWX3gdgpeWeCpL_vhzf6tnGPmxetO1gOlQc'
#SHEET_ID = '1506911698'
#url = f'https://docs.google.com/spreadsheets/d/{KEY}/export?format=csv&id={KEY}&gid={SHEET_ID}'
#temp_file_name = 'csv/test_csv.csv'
#download = req.get(url)
#with open(temp_file_name, 'w', newline='\n') as temp_file:
# temp_file.writelines(download.text.replace('\r\n','\n'))
#df = pd.read_csv(temp_file_name, dtype='unicode',keep_default_na=False)
df['members'] = [s.replace(' ','').split(',') for s in df.member_ids.values]
df['experiments'] = [s.replace('*','').replace(' ','').split(',') for s in df.experiment_ids.values]
df['models'] = [s.replace('All Available','All').replace(' ','').split(',') for s in df.source_ids.values]
df['variables'] = [s.replace(' ','').split(',') for s in df['variable_ids (comma separated list)'].values]
df['table'] = [s.replace(' ','').split(':')[0] for s in df.table_id.values]
df['requester'] = df['Your name']
df['science'] = df['Science Question/Motivation']
df['comments'] = df['Questions and comments']
df = df.drop(['Your name', 'Science Question/Motivation',
'Have you verified the existence of the data you will request?',
'table_id', 'source_ids', 'experiment_ids','member_ids',
'variable_ids (comma separated list)', 'Questions and comments'],1)
return df
def requests(df_prior,rows=[],emails=[],tables=[]):
df = df_prior.copy()
if len(rows)+len(emails)+len(tables) == 0:
df = df[df['requester']=='nutter']
if len(rows) > 0:
df = df.iloc[rows]
if len(emails) > 0:
dk = []
for email in emails:
dk += [df[df['E-mail']==email]]
df = pd.concat(dk)
if len(tables) > 0:
dk = []
for table in tables:
print('table',table)
dk += [df[df['table']==table]]
df = pd.concat(dk)
df_req = df[df['requester']!='Test']
os.system("/bin/rm -f csv/request_new.csv")
df_all = getsheet()
# save and read back in order to look like df_prior
df_all.to_csv('csv/request_new.csv',index=False, encoding='latin1')
df_all = pd.read_csv('csv/request_new.csv', dtype='unicode',encoding='latin1')
df_new = df_all.merge(df_prior, how='left', indicator=True)
df_new = df_new[df_new['_merge']=='left_only'].drop('_merge',1)
df_new = pd.concat([df_req,df_new],sort=False)
# convert strings back to lists
for key in ['experiments','models','variables','members']:
df_new.loc[:,key] = df_new.loc[:,key].apply(literal_eval)
dtrouble = request_clean(df_new)
return df_new, dtrouble
def set_request_id():
return datetime.now().strftime('%Y%m%d-%H%M')
def request_clean(df):
df_exp = pd.read_csv('CVs/Experiments.csv')
experiments = list(df_exp.experiment_id.unique())
#experiments = list(pd.read_csv('csv/Experiments_tier1.csv').experiment_id.unique())
#experiments += list(pd.read_csv('csv/Experiments_tier2.csv').experiment_id.unique())
#experiments += list(pd.read_csv('csv/Experiments_tier3.csv').experiment_id.unique())
#experiments += list(pd.read_csv('csv/Experiments_tier4.csv').experiment_id.unique())
df_var = pd.read_csv('CVs/Variables.csv')
variables = list(df_var.variable_id.unique())
df_source = pd.read_csv('CVs/Models.csv')
sources = list(df_source.source_id.unique())
dtrouble = {}
for item,row in df.iterrows():
troubles = []
email = row['E-mail']
if type(email)==type(0.0):
continue
table_id = row['table']
experiment_ids = row['experiments']
for experiment_id in experiment_ids:
if not ((experiment_id in experiments)or(experiment_id=='All')):
troubles += [f"Warning: {experiment_id} is not a valid experiment_id"]
source_ids = row['models']
for source_id in source_ids:
if not ((source_id in sources)or(source_id=='All')):
troubles += [f"Warning: {source_id} is not a valid source_id"]
variable_ids = row['variables']
for variable_id in variable_ids:
if variable_id in variables:
tables = list(df_var[df_var.variable_id == variable_id].table_id.unique())
if not (table_id in tables):
troubles += [f"Warning: variable_id={variable_id} is not available in table_id={table_id}"]
if len(troubles)>=1:
print(email,item,troubles)
dtrouble[email+'_'+str(item)] = troubles
return dtrouble