### Identify events which appear twice and delete them

In [2]:
import os
import hashlib
import pandas as pd
import numpy as np
import shutil

In [3]:
def create_copy_of_files(em, gauge_num):
    target_directory = f"/nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/{em}/{gauge_num}/EventSet"

    # Create the directory if it does not exist
    os.makedirs(target_directory, exist_ok=True)

    # Get the current working directory
    current_directory = f"/nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/{em}/{gauge_num}/"

    # List all files in the current directory
    files = [f for f in os.listdir(current_directory) if os.path.isfile(os.path.join(current_directory, f))]

    # Copy each file to the target directory
    for file in files:
        source_path = os.path.join(current_directory, file)
        destination_path = os.path.join(target_directory, file)
        shutil.copy(source_path, destination_path)

    print(f"All files have been copied to {target_directory}.")

def load_csv_files(directory):
    csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]
    dataframes = {}
    
    for csv_file in csv_files:
        file_path = os.path.join(directory, csv_file)
        df = pd.read_csv(file_path)
        if 'precipitation (mm)' in df.columns:
            dataframes[csv_file] = df['precipitation (mm)']
    
    return dataframes

def compare_and_delete_duplicates(dataframes):
    filenames = list(dataframes.keys())
    num_files = len(filenames)
    duplicates = set()

    for i in range(num_files):
        for j in range(i + 1, num_files):
            file1, file2 = filenames[i], filenames[j]
            if dataframes[file1].equals(dataframes[file2]):
                duplicates.add(file2)  # Add the second file to the set of duplicates
                # print(f"The 'rolling_sum' column in {file1} is the same as in {file2}. Deleting {file2}.")

    # Delete the duplicate files
    for file in duplicates:
        os.remove(os.path.join(directory_path, file))
        # print(f"Deleted file: {file}")

def compare_columns(dataframes):
    filenames = list(dataframes.keys())
    num_files = len(filenames)
    
    for i in range(num_files):
        for j in range(i + 1, num_files):
            file1, file2 = filenames[i], filenames[j]
            if dataframes[file1].equals(dataframes[file2]):
                print(f"The 'rolling_sum' column in {file1} is the same as in {file2}")       

### UKCP18 (done for bc005, started for bc009 but not many profiles actually made yet)

In [13]:
em='bc009'
for gauge_num in range(0,10):
    if gauge_num not in [444, 827, 888]:
        create_copy_of_files(em, gauge_num)
        print(f"Gauge number {gauge_num}")
        directory_path = f"/nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/{em}/{gauge_num}/"
        # Move files to extra directoy incase deleting part is wrong
        dataframes = load_csv_files(directory_path)
        print(len(dataframes))
        # Compare the 'rolling_sum' columns
        compare_and_delete_duplicates(dataframes)
        dataframes = load_csv_files(directory_path)
        print(len(dataframes))

All files have been copied to /nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/bc009/0/EventSet.
Gauge number 0
94
94
All files have been copied to /nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/bc009/1/EventSet.
Gauge number 1
100
100
All files have been copied to /nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/bc009/2/EventSet.
Gauge number 2
91
91
All files have been copied to /nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/bc009/3/EventSet.
Gauge number 3
95
95
All files have been copied to /nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/bc009/4/EventSet.
Gauge number 4
107
107
All files have been copied to /nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/bc009/5/EventSet.
Gauge number 5
105
105
All files have been copied to /nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/bc009/6/EventSet.
Gauge number 6
114
114
All files have been copied to /nf

### Native NIMROD (Done)

In [3]:
for gauge_num in range(0,1295):
    if gauge_num not in [444, 827, 888]:
        print(f"Gauge number {gauge_num}")
        directory_path = f"/nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/NIMROD/NIMROD_1km_filtered_100/{gauge_num}/"
        # Move files to extra directoy incase deleting part is wrong
        copy_files(directory_path)
        dataframes = load_csv_files(directory_path)
        print(len(dataframes))
        # Compare the 'rolling_sum' columns
        compare_and_delete_duplicates(dataframes)
        dataframes = load_csv_files(directory_path)
        print(len(dataframes))

Gauge number 0
78
78
Gauge number 1
83
83
Gauge number 2
84
84
Gauge number 3
66
66
Gauge number 4
80
80
Gauge number 5
91
91
Gauge number 6
82
82
Gauge number 7
81
81
Gauge number 8
76
76
Gauge number 9
80
80
Gauge number 10
84
84
Gauge number 11
78
78
Gauge number 12
78
78
Gauge number 13
84
84
Gauge number 14
70
70
Gauge number 15
78
78
Gauge number 16
80
80
Gauge number 17
76
76
Gauge number 18
81
81
Gauge number 19
74
74
Gauge number 20
81
81
Gauge number 21
84
84
Gauge number 22
87
87
Gauge number 23
80
80
Gauge number 24
75
75
Gauge number 25
84
84
Gauge number 26
70
70
Gauge number 27
74
74
Gauge number 28
79
79
Gauge number 29
70
70
Gauge number 30
84
84
Gauge number 31
90
90
Gauge number 32
82
82
Gauge number 33
76
76
Gauge number 34
83
83
Gauge number 35
73
73
Gauge number 36
91
91
Gauge number 37
87
87
Gauge number 38
85
85
Gauge number 39
84
84
Gauge number 40
86
86
Gauge number 41
81
81
Gauge number 42
78
78
Gauge number 43
64
64
Gauge number 44
80
80
Gauge number 45
73
7

79
79
Gauge number 362
85
85
Gauge number 363
84
84
Gauge number 364
92
92
Gauge number 365
83
83
Gauge number 366
80
80
Gauge number 367
81
81
Gauge number 368
89
89
Gauge number 369
86
86
Gauge number 370
79
79
Gauge number 371
81
81
Gauge number 372
74
74
Gauge number 373
82
82
Gauge number 374
92
92
Gauge number 375
80
80
Gauge number 376
75
75
Gauge number 377
75
75
Gauge number 378
81
81
Gauge number 379
81
81
Gauge number 380
79
79
Gauge number 381
69
69
Gauge number 382
87
87
Gauge number 383
93
89
Gauge number 384
97
79
Gauge number 385
103
92
Gauge number 386
81
60
Gauge number 387
92
82
Gauge number 388
96
75
Gauge number 389
89
74
Gauge number 390
88
74
Gauge number 391
90
79
Gauge number 392
93
76
Gauge number 393
93
78
Gauge number 394
98
84
Gauge number 395
89
74
Gauge number 396
91
75
Gauge number 397
86
72
Gauge number 398
86
72
Gauge number 399
93
79
Gauge number 400
105
75
Gauge number 401
108
71
Gauge number 402
105
75
Gauge number 403
108
71
Gauge number 404
107
72

80
80
Gauge number 717
73
73
Gauge number 718
71
71
Gauge number 719
72
72
Gauge number 720
71
71
Gauge number 721
69
69
Gauge number 722
76
76
Gauge number 723
73
73
Gauge number 724
74
74
Gauge number 725
74
74
Gauge number 726
69
69
Gauge number 727
77
77
Gauge number 728
70
70
Gauge number 729
72
72
Gauge number 730
73
73
Gauge number 731
67
67
Gauge number 732
61
61
Gauge number 733
77
77
Gauge number 734
72
72
Gauge number 735
66
66
Gauge number 736
76
76
Gauge number 737
77
77
Gauge number 738
73
73
Gauge number 739
72
72
Gauge number 740
60
60
Gauge number 741
63
63
Gauge number 742
75
75
Gauge number 743
70
70
Gauge number 744
65
65
Gauge number 745
81
81
Gauge number 746
71
71
Gauge number 747
70
70
Gauge number 748
78
78
Gauge number 749
73
73
Gauge number 750
83
83
Gauge number 751
77
77
Gauge number 752
86
86
Gauge number 753
79
79
Gauge number 754
84
84
Gauge number 755
73
73
Gauge number 756
79
79
Gauge number 757
83
83
Gauge number 758
76
76
Gauge number 759
84
84
Gauge

67
Gauge number 1072
81
81
Gauge number 1073
71
71
Gauge number 1074
68
68
Gauge number 1075
76
76
Gauge number 1076
68
68
Gauge number 1077
60
60
Gauge number 1078
57
57
Gauge number 1079
73
73
Gauge number 1080
66
66
Gauge number 1081
85
85
Gauge number 1082
76
76
Gauge number 1083
81
81
Gauge number 1084
71
71
Gauge number 1085
83
83
Gauge number 1086
76
76
Gauge number 1087
75
75
Gauge number 1088
79
79
Gauge number 1089
67
67
Gauge number 1090
79
79
Gauge number 1091
67
67
Gauge number 1092
76
76
Gauge number 1093
75
75
Gauge number 1094
78
78
Gauge number 1095
88
88
Gauge number 1096
78
78
Gauge number 1097
86
86
Gauge number 1098
84
84
Gauge number 1099
68
68
Gauge number 1100
85
85
Gauge number 1101
76
76
Gauge number 1102
79
79
Gauge number 1103
81
81
Gauge number 1104
74
74
Gauge number 1105
77
77
Gauge number 1106
78
78
Gauge number 1107
71
71
Gauge number 1108
73
73
Gauge number 1109
80
80
Gauge number 1110
80
80
Gauge number 1111
67
67
Gauge number 1112
84
84
Gauge number 

FileNotFoundError: [Errno 2] No such file or directory: '/nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/NIMROD/NIMROD_1km_filtered_100/1294/'

### Regridded NIMROD (Done)

In [94]:
for gauge_num in range(888,1294):
    if gauge_num not in [444, 827, 888]:
        print(f"Gauge number {gauge_num}")
        directory_path = f"/nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/NIMROD/NIMROD_2.2km_unfiltered/{gauge_num}/"
        # Move files to extra directoy incase deleting part is wrong
        copy_files(directory_path)
        dataframes = load_csv_files(directory_path)
        print(len(dataframes))
        # Compare the 'rolling_sum' columns
        compare_and_delete_duplicates(dataframes)
        dataframes = load_csv_files(directory_path)
        print(len(dataframes))

Gauge number 889
109
79
Gauge number 890
109
81
Gauge number 891
108
77
Gauge number 892
105
72
Gauge number 893
111
84
Gauge number 894
112
88
Gauge number 895
111
80
Gauge number 896
112
87
Gauge number 897
105
59
Gauge number 898
107
54
Gauge number 899
107
55
Gauge number 900
105
57
Gauge number 901
108
67
Gauge number 902
107
63
Gauge number 903
109
65
Gauge number 904
106
70
Gauge number 905
107
72
Gauge number 906
108
67
Gauge number 907
109
74
Gauge number 908
107
74
Gauge number 909
108
71
Gauge number 910
108
64
Gauge number 911
107
64
Gauge number 912
109
77
Gauge number 913
109
78
Gauge number 914
109
72
Gauge number 915
111
75
Gauge number 916
112
77
Gauge number 917
108
69
Gauge number 918
110
71
Gauge number 919
109
76
Gauge number 920
110
72
Gauge number 921
112
76
Gauge number 922
112
76
Gauge number 923
116
90
Gauge number 924
105
67
Gauge number 925
107
71
Gauge number 926
109
78
Gauge number 927
111
75
Gauge number 928
113
85
Gauge number 929
112
81
Gauge number 930

107
70
Gauge number 1222
109
67
Gauge number 1223
113
79
Gauge number 1224
111
78
Gauge number 1225
114
80
Gauge number 1226
108
69
Gauge number 1227
110
78
Gauge number 1228
112
82
Gauge number 1229
113
77
Gauge number 1230
109
75
Gauge number 1231
112
84
Gauge number 1232
114
89
Gauge number 1233
112
76
Gauge number 1234
112
79
Gauge number 1235
108
69
Gauge number 1236
109
78
Gauge number 1237
108
74
Gauge number 1238
109
72
Gauge number 1239
115
72
Gauge number 1240
108
70
Gauge number 1241
109
79
Gauge number 1242
109
74
Gauge number 1243
109
73
Gauge number 1244
107
71
Gauge number 1245
109
78
Gauge number 1246
107
65
Gauge number 1247
109
59
Gauge number 1248
117
85
Gauge number 1249
113
81
Gauge number 1250
116
79
Gauge number 1251
111
73
Gauge number 1252
113
82
Gauge number 1253
106
77
Gauge number 1254
111
62
Gauge number 1255
110
76
Gauge number 1256
110
75
Gauge number 1257
106
72
Gauge number 1258
116
91
Gauge number 1259
114
89
Gauge number 1260
116
88
Gauge number 1261


FileNotFoundError: [Errno 2] No such file or directory: '/nfs/a319/gy17m2a/PhD/ProcessedData/IndependentEvents/NIMROD/NIMROD_2.2km_unfiltered/1294/'