# Mining Frequent Patterns from Air Pollution Data

In [None]:
from PAMI.extras.DF2DB import denseDF2DB as pro
from PAMI.frequentPattern.basic import FPGrowth as alg
import plotly.express as px
import pandas as pd

class MFP:
    def __init__(self, fileName):
        df = pd.read_csv(fileName).fillna(0)
        self.data = df.where(df['timestamp'].replace('(.*)', 0, regex=True) < 100, 0).rename(columns={'timestamp': 'tid'}).replace('2023-(.*)', regex=True)
        for i in range(self.data.shape[0]):
            self.data.iloc[i, 0] = i + 1
        self.dataProcessing()
        self.knowledgeDiscovery()
        self.visualization()

    def dataProcessing(self):
        db = pro.denseDF2DB(inputDF=self.data, thresholdValue=15, condition='>=')
        db.createTransactional(outputFile='PM24HeavyPollutionRecordingSensors.csv')
    
    def knowledgeDiscovery(self):
        inputFile = 'PM24HeavyPollutionRecordingSensors.csv'
        minSup = 200
        obj = alg.FPGrowth(inputFile, minSup)
        obj.startMine()
        obj.save('frequentPatterns.txt')
    
    def visualization(self):
        with open('frequentPatterns.txt', 'r', encoding = 'UTF-8') as f:
            content = f.read()
            lines = content.split('\n')
            max = -1
            longest = -1
            for i in range(len(lines)):
                c = lines[i].split('POINT(')
                if max < len(c):
                    max = len(c)
                    longest = i
            long = []
            lati = []
            p = lines[longest].replace('tid', '').replace(':', ' ').replace('\t', ' ').split()
            for i in p:
                if 'POINT(' in i:
                    long.append(float(i.replace('POINT(', '')))
                elif ')' in i:
                    lati.append(float(i.replace(')', '')))
            figure = px.scatter_mapbox(
                # the setting of the data frame, latitude, and longitude
                lat = lati,
                lon = long,
                # the setting of drawing
                size = [1] * len(lati),
                center = {'lat':34.686567, 'lon':135.52000},
                zoom = 4,
                height = 600,
                width = 800)
            figure.update_layout(mapbox_style='open-street-map')
            figure.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
            figure.update_layout(title_text="longest pattern")
            figure.show()

if __name__ == '__main__':
    MFP('airPollutionData.csv')

https://github.com/s1290216/PAMI