In [67]:
import pandas as pd
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt

In [68]:
### Necessary capabilities:
#
# 1. Curve fitting with scipy and extracting the Zipf's Law exponent along with R^2
# 
# 2. Automate dataset creation with Ovito. This means automatically plugging in coordinate data, varying cutoff distance,
#    and extracting data on cluster size and rank

In [69]:
# Finding the R^2 value of our curve fitting prediction
# Found here: https://stackoverflow.com/questions/19189362/getting-the-r-squared-value-using-curve-fit

def findR2(fun, x, y, popt):
    
    residuals = y - fun(x, *popt)

    ss_res = np.sum(residuals**2)

    ss_tot = np.sum((y - np.mean(y))**2)

    return 1 - (ss_res/ss_tot)

# Our function!

def expFunc(x, a, b):
    
    return a*x**b  

In [84]:
# Declare file path and convention here

zip_code = '15090'

s1 = 'E:/Old Downloads Folder/Research Stuff/Jupyter Notebooks/Coords Folder/'

s2 = '/' + zip_code + '_'

# Declare your Ovito step size and max range here.

step = 0.01

final = 1

# Create the dataframe that will hold our results

column_names = ['Ovito Range', 'coeff', 'exp', 'R2']

final_frame = pd.DataFrame(columns = column_names)

# The loop that does everything yeet

i = step

while i <= final:
    
    i = round(i, 2)
    
    openpath = s1 + zip_code + s2 + str(i) + '.txt'
    
    file = open(openpath)
    
    data = pd.read_csv(file, sep = ',', names = ['Size'], skiprows = [0])
    
    file.close()
    
    data.index = data.index + 1
    
    data = data.reset_index()
    
    xdata = data['index']
    
    ydata = data['Size']
    
    variables, covariance = curve_fit(expFunc, xdata, ydata)
    
    temp_df = pd.DataFrame([[i, variables[0], variables[1], findR2(expFunc, xdata, ydata, variables)]], columns = column_names)
    
    final_frame = final_frame.append(temp_df, ignore_index = True)
    
    i += step
    
final_frame



Unnamed: 0,Ovito Range,coeff,exp,R2
0,0.01,1.255451,-0.027496,0.080336
1,0.02,9.447050,-0.278421,0.575902
2,0.03,16.882448,-0.354101,0.756342
3,0.04,33.845911,-0.443623,0.901409
4,0.05,84.027416,-0.570955,0.880108
...,...,...,...,...
95,0.96,9376.999659,-8.710326,0.999999
96,0.97,9376.999659,-8.710326,0.999999
97,0.98,9376.999659,-8.710326,0.999999
98,0.99,9376.999659,-8.710326,0.999999
