In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statistics

In [8]:
BoreholeData = pd.read_csv("./NorthAmericanBoreholes.csv", index_col = 0)
BoreholeData.head()

Unnamed: 0,Borehole,Depth,Temperature,Longitude,Latitudes,link,Data Contact,Country
0,CA-0001,29.78,4.44,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
1,CA-0001,39.67,4.44,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
2,CA-0001,49.56,4.49,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
3,CA-0001,59.39,4.55,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
4,CA-0001,69.22,4.62,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA


In [19]:
# Filter low depths due to effects of weather on temperature

DataFilter1 = BoreholeData[BoreholeData["Depth"] > 50]
DataFilter1.head(20)

Unnamed: 0,Borehole,Depth,Temperature,Longitude,Latitudes,link,Data Contact,Country
3,CA-0001,59.39,4.55,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
4,CA-0001,69.22,4.62,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
5,CA-0001,79.02,4.68,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
6,CA-0001,88.82,4.74,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
7,CA-0001,98.54,4.8,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
8,CA-0001,108.27,4.86,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
9,CA-0001,117.9,4.92,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
10,CA-0001,127.44,4.98,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
11,CA-0001,136.98,5.05,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
12,CA-0001,146.46,5.19,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA


In [72]:
# Regression function

def regress(data, yvar, xvars):
    Y = data[yvar]
    X = data[xvars]
    X['Intercept'] = 1.
    result = sm.OLS(Y, X).fit()
    return result.params

In [73]:
# Group by each borehole

DataFilter2 = DataFilter1.groupby("Borehole").apply(regress, "Temperature", ["Depth"])

In [74]:
# Convert units

DataFilter2["Depth"] = DataFilter2["Depth"] * 1000

In [75]:
# Sort and rename

DataFilter2 = DataFilter2.sort_values("Depth")
DataFilter2 = DataFilter2.rename(columns = {'Depth':'Geothermal Gradient'})
DataFilter2

Unnamed: 0_level_0,Geothermal Gradient,Intercept
Borehole,Unnamed: 1_level_1,Unnamed: 2_level_1
CA-9013,1.612110,5.653582
US-NY7-78,2.930193,10.457502
CA-9001,4.788909,5.744754
CA-9212,5.580164,3.881823
US-NY6-14,5.669165,8.250001
...,...,...
US-CO5-23,55.688744,8.232881
CA-074-0,56.643865,7.700499
US-MT2-14,74.904574,7.392564
US-KEN95,83.888765,10.520182


In [105]:
DataFilter3 = DataFilter1.drop(["Depth", "Temperature"], axis = 1)
DataFilter3 = DataFilter3.groupby("Borehole").apply(pd.DataFrame.mode).reset_index(drop=True)
DataFilter3 = DataFilter3.rename(columns = {'Latitudes':'Latitude'})
DataFilter3 = DataFilter3.set_index("Borehole")
DataFilter3

Unnamed: 0_level_0,Longitude,Latitude,link,Data Contact,Country
Borehole,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CA-0001,-93.94,51.13,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
CA-0002,-93.86,51.18,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
CA-0003,-93.17,50.96,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
CA-0004,-93.14,50.99,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
CA-0005,-92.89,51.12,http://geothermal.earth.lsa.umich.edu/DATA/CA-...,J-C Mareschal,CA
...,...,...,...,...,...
US-nos92,-72.55,43.33,http://geothermal.earth.lsa.umich.edu/DATA/US-...,E.R. Decker,US
US-sar64,-74.27,44.33,http://geothermal.earth.lsa.umich.edu/DATA/US-...,H.N. Pollack,US
US-sar92,-74.27,44.33,http://geothermal.earth.lsa.umich.edu/DATA/US-...,E.R. Decker,US
US-wad64,-73.47,44.23,http://geothermal.earth.lsa.umich.edu/DATA/US-...,H.N. Pollack,US


In [108]:
Data = DataFilter3.join(DataFilter2)

In [109]:
Data.to_csv('BoreholeNASummarized.csv')