In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from os import listdir
import numpy as np
import math
from scipy.spatial import ConvexHull
from scipy.spatial.distance import cdist
from scipy.spatial import distance
from itertools import groupby

In [2]:
def calculateMass(data):
#      multiply by constant 917
    result = 0
    for d in data:
        result += (1/6)*math.pi*math.pow(d,3) * 917 * math.pow(10, 6)
#     return round(result, 3)
    return result

In [3]:
def calculateLength(data):
    """
    find the most distant point
    data = a nupmpy array containing the points
    """
    """first check if the x points are same or not
    if same we will use the naive techniques
    if not we will use the convex hull approach
    """
    xVals = [i[0] for i in data]
#     print(xVals)
    g = groupby(xVals)
    sameValues = next(g, True) and not next(g, False)
#     print(sameValues)
    
    if sameValues == False:
        # # Find a convex hull in O(N log N)
        hull = ConvexHull(data)

        # Extract the points forming the hull
        hullpoints = data[hull.vertices,:]
    
    elif sameValues == True:
        hullpoints = data       

#     print(hullpoints)
    
    # Naive way of finding the best pair in O(H^2) time if H is number of points on
    # hull
    hdist = cdist(hullpoints, hullpoints, metric='euclidean')

    # Get the farthest apart points
    bestpair = np.unravel_index(hdist.argmax(), hdist.shape)

    #Print them
    points = [hullpoints[bestpair[0]],hullpoints[bestpair[1]]]
#     print(points)

    # print the distance
#     dist = np.linalg.norm(points[0] - points[1])
    dst = distance.euclidean(points[0], points[1])
#     print(points[0], points[1], dst)
    return round(dst, 3)

In [5]:
columns = ["cluster", "nodeCluster", "temperature", "ice_particles", "ice_mass", "ice_length"]
folders = [f for f in listdir("data/withOutliers/")]
# folders = ['newData']
for folder in folders:
    onlyfiles = [f for f in listdir("data/withOutliers/{}".format(folder)) ]
    simulationData = {}
#     print(folder)
    for file in onlyfiles:
        path = "data/withOutliers/{}/{}".format(folder, file)
#         print(path)
        df = pd.read_csv(path)
#             calculating the diameter in micron (?)
#         df['new_d'] = df['d'] * math.pow(10, 6)
    
#         getting the temperature means
        means = round(df.groupby('cluster')['T'].mean(), 3)
#         print(df.groupby('cluster')['nodeCluster'])
#     getting the clusters
        clusters = means.index.tolist()
#         print(clusters)
#     getting the nodeCluster
        nodeClusters = sorted(df['nodeCluster'].value_counts().index.tolist())
#         print(nodeClusters)
#     storing the temperature
        temperature = means.tolist()
#     storing total particles
        particles = df.groupby('cluster')['cluster'].value_counts().tolist()

#     adding the total values for all data
        clusters.append("all")
        nodeClusters.append('all')
        temperature.append(round(df['T'].mean(), 3))
        particles.append(df.shape[0])

#         extracting the total volumes
#         volumes = []
        volumesExact = []
#         extracting the farthest distance
        distances = []
        for each in clusters:
#             print(each)
            if each == 'all':
#                 print(each)
#                 volumes.append(calculateVolume(df['new_d'].tolist()))
                volumesExact.append(calculateMass(df['d'].tolist()))
                distances.append(calculateLength(df[['Points:0', 'Points:1', 'Points:2']].to_numpy()))
            else:
#                 print(each)
#                 volumes.append(calculateVolume(df[df.cluster == each]['new_d'].tolist()))
                volumesExact.append(calculateMass(df[df.cluster == each]['d'].tolist()))
                distances.append(calculateLength(df[df.cluster == each][['Points:0', 'Points:1', 'Points:2']].to_numpy()))
        
        
        # print(means)
#         print(clusters)
#         print(temperature)
#         print(particles)
#         print(volumes)
#         print(distances)
#         print(volumesExact)
        for index, value in enumerate(nodeClusters):
            if value == 'all':
                pass
            else:
                simulationData[value] = {'temp': temperature[index], "mass": volumesExact[index], ''}
            

# output csv generation
#         extract = pd.DataFrame(np.column_stack([clusters, nodeClusters, temperature, particles, volumesExact, distances]),
#                       columns=columns)

#         extract.to_csv('output/withOutliers/{}/{}'.format(folder, file), index=False)
#         print("exported {} {} ".format(folder, file))


1 0
2 1
3 2
4 3
5 4
6 5
all 6
7 0
8 1
9 2
10 3
11 4
12 5
all 6
13 0
14 1
all 2
15 0
16 1
17 2
all 3
18 0
19 1
20 2
21 3
22 4
all 5
23 0
24 1
25 2
26 3
all 4
27 0
28 1
29 2
30 3
31 4
all 5
32 0
33 1
34 2
all 3
35 0
36 1
37 2
38 3
all 4
39 0
40 1
41 2
42 3
all 4
43 0
44 1
45 2
46 3
47 4
48 5
all 6
1 0
2 1
3 2
4 3
5 4
all 5
6 0
7 1
8 2
9 3
10 4
11 5
12 6
13 7
all 8
14 0
15 1
16 2
17 3
18 4
all 5
19 0
20 1
21 2
all 3
22 0
23 1
all 2
24 0
25 1
all 2
26 0
27 1
28 2
29 3
30 4
31 5
all 6
32 0
33 1
34 2
all 3
35 0
36 1
37 2
38 3
39 4
40 5
all 6
41 0
42 1
43 2
44 3
45 4
46 5
all 6
47 0
48 1
49 2
50 3
all 4
1 0
2 1
3 2
4 3
5 4
6 5
all 6
7 0
8 1
9 2
10 3
all 4
11 0
12 1
13 2
14 3
all 4
15 0
16 1
17 2
18 3
all 4
19 0
20 1
21 2
all 3
22 0
23 1
24 2
25 3
26 4
all 5
27 0
28 1
29 2
30 3
all 4
31 0
32 1
33 2
all 3
34 0
35 1
36 2
37 3
all 4
38 0
39 1
40 2
all 3
41 0
42 1
43 2
44 3
all 4
1 0
2 1
3 2
4 3
5 4
6 5
all 6
7 0
8 1
9 2
10 3
11 4
12 5
13 6
all 7
14 0
15 1
16 2
17 3
18 4
19 5
all 6
20 0
21 1
22 2


In [10]:
path = "data/withoutOutliers/{}/{}".format("contrails1", "0.2.csv")
df = pd.read_csv(path)
pts = df[['Points:0', 'Points:1', 'Points:2']].to_numpy()
hull = ConvexHull(p)

In [14]:

from mpl_toolkits.mplot3d import Axes3D


# 8 points defining the cube corners
# pts = np.array([[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0],
#                 [0, 0, 1], [1, 0, 1], [1, 1, 1], [0, 1, 1], ])

hull = ConvexHull(pts)

# fig = plt.figure()
# ax = fig.add_subplot(111, projection="3d")

# # Plot defining corner points
# ax.plot(pts.T[0], pts.T[1], pts.T[2], "ko")

# # 12 = 2 * 6 faces are the simplices (2 simplices per square face)
# for s in hull.simplices:
#     s = np.append(s, s[0])  # Here we cycle back to the first coordinate
#     ax.plot(pts[s, 0], pts[s, 1], pts[s, 2], "r-")

# # Make axis label
# for i in ["x", "y", "z"]:
#     eval("ax.set_{:s}label('{:s}')".format(i, i))

# plt.show()

print(hull)

<scipy.spatial.qhull.ConvexHull object at 0x0000026375062248>


In [7]:
x = rand(1,20);              # 20 random x values
y = rand(1,20);              # 20 random y values
hullPoints = convhull(x,y);  # Find the points defining the convex hull
patch(x(hullPoints),y(hullPoints),'r');  # Plot the convex hull in red
hold on;                     # Add to the existing plot
scatter(x,y);    

SyntaxError: invalid syntax (Temp/ipykernel_7936/3301890463.py, line 5)

In [8]:
[4.8062e+00 3.8889e-01 5.9698e-12] [ 5.29590000e+01 -1.19520000e+00 -4.06809985e-09] 48.17884897927824

SyntaxError: invalid syntax (Temp/ipykernel_7936/1113393829.py, line 1)

In [11]:
5.29590000e+01

52.959