In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from os import listdir
import numpy as np
import math
from scipy.spatial import ConvexHull
from scipy.spatial.distance import cdist
from scipy.spatial import distance
from itertools import groupby

In [2]:
def calculateVolume(data):
    result = 0
    for d in data:
        result += (1/6)*math.pi*math.pow(d,3)
    return round(result, 3)

In [3]:
def calculateLength(data):
    """
    find the most distant point
    data = a nupmpy array containing the points
    """
    """first check if the x points are same or not
    if same we will use the naive techniques
    if not we will use the convex hull approach
    """
    xVals = [i[0] for i in data]
#     print(xVals)
    g = groupby(xVals)
    sameValues = next(g, True) and not next(g, False)
#     print(sameValues)
    
    if sameValues == False:
        # # Find a convex hull in O(N log N)
        hull = ConvexHull(data)

        # Extract the points forming the hull
        hullpoints = data[hull.vertices,:]
    
    elif sameValues == True:
        hullpoints = data       

#     print(hullpoints)
    
    # Naive way of finding the best pair in O(H^2) time if H is number of points on
    # hull
    hdist = cdist(hullpoints, hullpoints, metric='euclidean')

    # Get the farthest apart points
    bestpair = np.unravel_index(hdist.argmax(), hdist.shape)

    #Print them
    points = [hullpoints[bestpair[0]],hullpoints[bestpair[1]]]
#     print(points)

    # print the distance
#     dist = np.linalg.norm(points[0] - points[1])
#     print(dist)
    dst = distance.euclidean(points[0], points[1])
    return round(dst, 3)

In [4]:
columns = ["cluster", "temperature", "particles", "volumes", "distance"]
folders = [f for f in listdir("data/")]
for folder in folders:
    onlyfiles = [f for f in listdir("data/{}".format(folder)) ]
#     print(folder)
    for file in onlyfiles:
        path = "data/{}/{}".format(folder, file)
        df = pd.read_csv(path)
#             calculating the diameter in micron (?)
        df['new_d'] = df['d'] * math.pow(10, 6)
    
#         getting the temperature means
        means = round(df.groupby('cluster')['T'].mean(), 3)
#     getting the clusters
        clusters = means.index.tolist()
#     storing the temperature
        temperature = means.tolist()
#     storing total particles
        particles = df.groupby('cluster')['cluster'].value_counts().tolist()

#     adding the total values for all data
        clusters.append("all")
        temperature.append(round(df['T'].mean(), 3))
        particles.append(df.shape[0])

#         extracting the total volumes
        volumes = []
        # volumesExact = []
#         extracting the farthest distance
        distances = []
        for each in clusters:
#             print(each)
            if each == 'all':
                volumes.append(calculateVolume(df['new_d'].tolist()))
        #         volumesExact.append(calculateVolume(df['d'].tolist()))
                distances.append(calculateLength(df[['Points:0', 'Points:1', 'Points:2']].to_numpy()))
            else:
                volumes.append(calculateVolume(df[df.cluster == each]['new_d'].tolist()))
        #         volumesExact.append(calculateVolume(df[df.cluster == each]['d'].tolist()))
                distances.append(calculateLength(df[df.cluster == each][['Points:0', 'Points:1', 'Points:2']].to_numpy()))
        
        
        # print(means)
        print(clusters)
#         print(temperature)
#         print(particles)
#         print(volumes)
#         print(distances)

# output csv generation
        extract = pd.DataFrame(np.column_stack([clusters, temperature, particles, volumes, distances]),
                      columns=columns)

        extract.to_csv('output/{}/{}'.format(folder, file), index=False)
        print("exported {} {} ".format(folder, file))


[1, 2, 3, 4, 5, 'all']
exported contrails1 0.1.csv 
[1, 2, 3, 4, 5, 'all']
exported contrails1 0.11.csv 
[1, 'all']
exported contrails1 0.12.csv 
[1, 2, 'all']
exported contrails1 0.13.csv 
[1, 2, 3, 4, 'all']
exported contrails1 0.14.csv 
[1, 2, 3, 'all']
exported contrails1 0.15.csv 
[1, 2, 3, 4, 'all']
exported contrails1 0.16.csv 
[1, 2, 'all']
exported contrails1 0.17.csv 
[1, 2, 3, 'all']
exported contrails1 0.18.csv 
[1, 2, 3, 'all']
exported contrails1 0.19.csv 
[1, 2, 3, 4, 5, 'all']
exported contrails1 0.2.csv 
[1, 2, 3, 4, 'all']
exported contrails2 0.1.csv 
[1, 2, 3, 4, 5, 6, 7, 'all']
exported contrails2 0.11.csv 
[1, 2, 3, 4, 'all']
exported contrails2 0.12.csv 
[1, 2, 'all']
exported contrails2 0.13.csv 
[1, 'all']
exported contrails2 0.14.csv 
[1, 'all']
exported contrails2 0.15.csv 
[1, 2, 3, 4, 5, 'all']
exported contrails2 0.16.csv 
[1, 2, 'all']
exported contrails2 0.17.csv 
[1, 2, 3, 4, 5, 'all']
exported contrails2 0.18.csv 
[1, 2, 3, 4, 5, 'all']
exported contrai