In [1]:
from google.colab import drive
drive.mount('/content/drive/',force_remount=True)

Mounted at /content/drive/


In [2]:
%cd "/content/drive/My Drive/KCAMF/Attacks"

/content/drive/My Drive/KCAMF/Attacks


# **Warming up**

In [3]:
!ls

attackDataset	    dataset		 MyTAAttacks.ipynb
attack.py	    GANattack.py	 randomAttack.py
averageAttack.py    getTargetsItem.py	 shillingfile.py
bandwagonAttack.py  LDos_LH.csv		 unorganizedMaliciousAttacks.py
config		    main.py
config.py	    MyLDosAttacks.ipynb


In [4]:
import os
path = '/content/drive/My Drive/KCAMF/Attacks/config/config.conf'
config = {}
print(path)
if not os.path.exists(path):
  print('config file is not found!')
  raise IOError
with open(path) as f:
  for ind,line in enumerate(f):
      if line.strip()!='':
          try:
              key,value=line.strip().split('=')
              config[key]=value
          except ValueError:
              print('config file is not in the correct format! Error Line:%d'%(ind))


/content/drive/My Drive/KCAMF/Attacks/config/config.conf


In [5]:
import os.path
class Config(object):
    def __init__(self,fileName):
        self.config = {}
        self.readConfiguration(fileName)

    def __getitem__(self, item):
        if not self.contains(item):
            print('parameter '+item+' is invalid!')
            exit(-1)
        return self.config[item]

    def getOptions(self,item):
        if not self.contains(item):
            print('parameter '+item+' is invalid!')
            exit(-1)
        return self.config[item]

    def contains(self,key):
        return self.config.__contains__(key)

    def readConfiguration(self,fileName):
        path = fileName
        print(path)
        if not os.path.exists(path):
            print('config file is not found!')
            raise IOError
        with open(path) as f:
            for ind,line in enumerate(f):
                if line.strip()!='':
                    try:
                        key,value=line.strip().split('=')
                        self.config[key]=value
                    except ValueError:
                        print('config file is not in the correct format! Error Line:%d'%(ind))

class LineConfig(object):
    def __init__(self,content):
        self.line = content.strip().split(' ')
        self.options = {}
        self.mainOption = False
        if self.line[0] == 'on':
            self.mainOption = True
        elif self.line[0] == 'off':
            self.mainOption = False
        for i,item in enumerate(self.line):
            if (item.startswith('-') or item.startswith('--')) and  not item[1:].isdigit():
                ind = i+1
                for j,sub in enumerate(self.line[ind:]):
                    if (sub.startswith('-') or sub.startswith('--')) and  not sub[1:].isdigit():
                        ind = j
                        break
                    if j == len(self.line[ind:])-1:
                        ind=j+1
                        break
                try:
                    self.options[item] = ' '.join(self.line[i+1:i+1+ind])
                except IndexError:
                    self.options[item] = 1


    def __getitem__(self, item):
        if not self.contains(item):
            print('parameter '+item+' is invalid!')
            exit(-1)
        return self.options[item]

    def getOption(self,key):
        if not self.contains(key):
            print('parameter '+key+' is invalid!')
            exit(-1)
        return self.options[key]

    def isMainOn(self):
        return self.mainOption

    def contains(self,key):
        return self.options.__contains__(key)




In [6]:
import os.path
from os import makedirs,remove
from re import compile,findall,split
#from config import LineConfig
from collections import defaultdict
class FileIO(object):
    def __init__(self):
        pass

    def writeFile(dir,file,content,op = 'w'):
        if not os.path.exists(dir):
            os.makedirs(dir)
        if type(content)=='str':
            with open(dir + file, op) as f:
                f.write(content)
        else:
            with open(dir+file,op) as f:
                f.writelines(content)

    def deleteFile(filePath):
        if os.path.exists(filePath):
            remove(filePath)

    def loadDataSet(conf, file, bTest=False):
        trainingData = defaultdict(dict)
        testData = defaultdict(dict)
        ratingConfig = LineConfig(conf['ratings.setup'])
        if not bTest:
            print('loading training data...')
        else:
            print('loading test data...')
        with open(file) as f:
            ratings = f.readlines()
        # ignore the headline
        if ratingConfig.contains('-header'):
            ratings = ratings[1:]
        # order of the columns
        order = ratingConfig['-columns'].strip().split()

        for lineNo, line in enumerate(ratings):
            items = split(' |,|\t', line.strip())
            if not bTest and len(order) < 3:
                print('The rating file is not in a correct format. Error: Line num %d' % lineNo)
                exit(-1)    
            try:
                userId = items[int(order[0])]
                itemId = items[int(order[1])]
                if bTest and len(order)<3:
                    rating = 1 #default value
                else:
                    rating  = items[int(order[2])]

            except ValueError:
                print('Error! Have you added the option -header to the rating.setup?')
                exit(-1)
            if not bTest:
                trainingData[userId][itemId]=float(rating)
            else:
                testData[userId][itemId] = float(rating)
        if not bTest:
            return trainingData
        else:
            return testData

    def loadRelationship(conf, filePath):
        socialConfig = LineConfig(conf['social.setup'])
        relation = []
        print('loading social data...')
        with open(filePath) as f:
            relations = f.readlines()
            # ignore the headline
        if socialConfig.contains('-header'):
            relations = relations[1:]
        # order of the columns
        order = socialConfig['-columns'].strip().split()
        if len(order) <= 2:
            print('The social file is not in a correct format.')
        for lineNo, line in enumerate(relations):
            items = split(' |,|\t', line.strip())
            if len(order) < 2:
                print('The social file is not in a correct format. Error: Line num %d' % lineNo)
                exit(-1)
            userId1 = items[int(order[0])]
            userId2 = items[int(order[1])]
            if len(order) < 3:
                weight = 1
            else:
                weight = float(items[int(order[2])])
            relation.append([userId1, userId2, weight])
        return relation


    def loadLabels(filePath):
        labels = {}
        with open(filePath) as f:
            for line in f:
                items = split(' |,|\t', line.strip())
                labels[items[0]] = items[1]
        return labels

In [7]:
import os.path
from os import makedirs,remove
from re import compile,findall,split
from collections import defaultdict
class FileIO(object):
    def __init__(self):
        pass

    def writeFile(dir,file,content,op = 'w'):
        if not os.path.exists(dir):
            os.makedirs(dir)
        if type(content)=='str':
            with open(dir + file, op) as f:
                f.write(content)
        else:
            with open(dir+file,op) as f:
                f.writelines(content)

    def deleteFile(filePath):
        if os.path.exists(filePath):
            remove(filePath)

    def loadDataSet(conf, file, bTest=False):
        trainingData = defaultdict(dict)
        testData = defaultdict(dict)
        ratingConfig = LineConfig(conf['ratings.setup'])
        if not bTest:
            print('loading training data...')
        else:
            print('loading test data...')
        with open(file) as f:
            ratings = f.readlines()
        # ignore the headline
        if ratingConfig.contains('-header'):
            ratings = ratings[1:]
        # order of the columns
        order = ratingConfig['-columns'].strip().split()

        for lineNo, line in enumerate(ratings):
            items = split(' |,|\t', line.strip())
            if not bTest and len(order) < 3:
                print('The rating file is not in a correct format. Error: Line num %d' % lineNo)
                exit(-1)    
            try:
                userId = items[int(order[0])]
                itemId = items[int(order[1])]
                if bTest and len(order)<3:
                    rating = 1 #default value
                else:
                    rating  = items[int(order[2])]

            except ValueError:
                print('Error! Have you added the option -header to the rating.setup?')
                exit(-1)
            if not bTest:
                trainingData[userId][itemId]=float(rating)
            else:
                testData[userId][itemId] = float(rating)
        if not bTest:
            return trainingData
        else:
            return testData

    def loadRelationship(conf, filePath):
        socialConfig = LineConfig(conf['social.setup'])
        relation = []
        print('loading social data...')
        with open(filePath) as f:
            relations = f.readlines()
            # ignore the headline
        if socialConfig.contains('-header'):
            relations = relations[1:]
        # order of the columns
        order = socialConfig['-columns'].strip().split()
        if len(order) <= 2:
            print('The social file is not in a correct format.')
        for lineNo, line in enumerate(relations):
            items = split(' |,|\t', line.strip())
            if len(order) < 2:
                print('The social file is not in a correct format. Error: Line num %d' % lineNo)
                exit(-1)
            userId1 = items[int(order[0])]
            userId2 = items[int(order[1])]
            if len(order) < 3:
                weight = 1
            else:
                weight = float(items[int(order[2])])
            relation.append([userId1, userId2, weight])
        return relation

    def loadLabels(filePath):
        labels = {}
        with open(filePath) as f:
            for line in f:
                items = split(' |,|\t', line.strip())
                labels[items[0]] = items[1]
        return labels

# **Attack Class**

In [8]:
from collections import defaultdict
import numpy as np
import random
import os
from os.path import abspath

class Attack(object):
    def __init__(self,conf):
        self.config = Config(conf)
        self.userProfile = FileIO.loadDataSet(self.config,self.config['ratings'])
        self.itemProfile = defaultdict(dict)
        self.attackSize = float(self.config['attackSize'])
        self.fillerSize = float(self.config['fillerSize'])
        self.selectedSize = float(self.config['selectedSize'])
        self.targetCount = int(self.config['targetCount'])
        self.targetScore = float(self.config['targetScore'])
        self.threshold = float(self.config['threshold'])
        self.minCount = int(self.config['minCount'])
        self.maxCount = int(self.config['maxCount'])
        self.minScore = float(self.config['minScore'])
        self.maxScore = float(self.config['maxScore'])
        self.outputDir = self.config['outputDir']
        if not os.path.exists(self.outputDir):
            os.makedirs(self.outputDir)
        for user in self.userProfile:
            for item in self.userProfile[user]:
                self.itemProfile[item][user] = self.userProfile[user][item]
        self.spamProfile = defaultdict(dict)
        self.spamItem = defaultdict(list) #items rated by spammers
        self.targetItems = []
        self.itemAverage = {}
        self.getAverageRating()
        self.selectTarget()
        self.startUserID = 0

    # def selectTarget(count = 20):
    #     pass
    # def reload(self):
    #     self.userProfile, self.itemProfile = loadRatings(self.config['ratings'])
    #     self.spamProfile = defaultdict(dict)
    #     self.spamItem = defaultdict(list)

    def getAverageRating(self):
        for itemID in self.itemProfile:
            li = self.itemProfile[itemID].values()
            self.itemAverage[itemID] = float(sum(li)) / len(li)


    def selectTarget(self,):
        print('Selecting target items...')
        #print('-')*80
        print('Target item       Average rating of the item')
        itemList = list(self.itemProfile.keys())
        sorted(itemList)
        targets = []
        '''
        with open('targets.txt') as f:
            content = f.readlines()
            for item in content:
                item = item.strip('\n')
                targets.append(item)
        #targets = ['3863','1388','1920','2084','204','2088','8861','99112','291','33836','1241','3705','5106','7367','46530','2402','102','2088','2709','1848']

        #targets = []
        #haha = []
        '''
        haha = []
        while len(self.targetItems) < self.targetCount:
            target = np.random.randint(len(itemList)) #generate a target order at random

            if len(self.itemProfile[str(itemList[target])]) < self.maxCount and len(self.itemProfile[str(itemList[target])]) > self.minCount \
                    and str(itemList[target]) not in self.targetItems \
                    and self.itemAverage[str(itemList[target])] <= self.threshold:
                self.targetItems.append(str(itemList[target]))
                haha.append(itemList[target])
                targets.append(itemList[target] + '\n')
                print(str(itemList[target]),'                  ',self.itemAverage[str(itemList[target])])
        
        for item in targets:
            self.targetItems.append(item)

        print(self.targetItems)
        
        
    def getFillerItems(self):
        mu = int(self.fillerSize*len(self.itemProfile))
        sigma = int(0.1*mu)
        markedItemsCount = abs(int(round(random.gauss(mu, sigma))))
        markedItems = np.random.randint(len(self.itemProfile), size=markedItemsCount)
        return markedItems.tolist()

    def insertSpam(self,startID=0):
        pass

    def loadTarget(self,filename):
        with open(filename) as f:
            for line in f:
                self.targetItems.append(line.strip())

    def generateLabels(self,filename):
        labels = []
        path = self.outputDir + filename
        with open(path,'w') as f:
            for user in self.spamProfile:
                labels.append(user+' 1\n')
            for user in self.userProfile:
                labels.append(user+' 0\n')
            f.writelines(labels)
        print('User profiles have been output to '+abspath(self.config['outputDir'])+'.')

    def generateProfiles(self,filename):
        ratings = []

        path = self.outputDir+filename
        with open(path, 'w') as f:
            for user in self.userProfile:
                for item in self.userProfile[user]:
                    ratings.append(user+' '+item+' '+str(self.userProfile[user][item])+'\n')

            for user in self.spamProfile:
                for item in self.spamProfile[user]:
                    ratings.append(user + ' ' + item + ' ' + str(self.spamProfile[user][item])+'\n')
            f.writelines(ratings)
        print('User labels have been output to '+abspath(self.config['outputDir'])+'.')
    
    def generateSpamProfiles(self,filename):
        #num_injected = int(len(self.userProfile)*self.attackSize * self.targetCount * len(self.getFillerItems()))
        num_injected = 1000
        print(num_injected)
        ratings = np.zeros((num_injected, 3))
        #print(len(ratings))
        i = 0
        path = self.outputDir+filename
        for user in self.spamProfile:
          for item in self.spamProfile[user]:
            ratings[i][0] = int(user)
            ratings[i][1] = int(item)
            ratings[i][2] = self.spamProfile[user][item]
            #print(i)
            i = i + 1
            #ratings.append(user + "," + item + "," + str(self.spamProfile[user][item]))
        
        print(ratings)
        ratings_df = pd.DataFrame(ratings)
        ratings_df.to_csv(path, header=False, index=False)

        '''
        with open(path, 'w') as f:
            for user in self.spamProfile:
                for item in self.spamProfile[user]:
                    ratings.append(user + "," + item + "," + 
                                   str(self.spamProfile[user][item]))
                    ratings.append("\n")
            f.writelines(ratings)
        '''
        print('Spam profiles have been output to '+abspath(self.config['outputDir'])+'.')



# **Random Attack**

In [9]:
import random
import numpy as np

class RandomAttack(Attack):
    def __init__(self,conf):
        super(RandomAttack, self).__init__(conf)


    def insertSpam(self,startID=0):
        print('Modeling random attack...')
        itemList = list(self.itemProfile.keys())
        
        if startID == 0:
            self.startUserID = len(self.userProfile)
        else:
            self.startUserID = startID

        for i in range(int(len(self.userProfile)*self.attackSize)):
            #fill 装填项目
            fillerItems = self.getFillerItems()
            #print(fillerItems)
            for item in fillerItems:
                self.spamProfile[str(self.startUserID)][str(itemList[item])] = random.randint(self.minScore,self.maxScore)

            #print(self.spamProfile)
            #target 目标项目
            #print(self.targetCount)
            for j in range(self.targetCount):
                #print(len(self.targetItems))
                target = np.random.randint(len(self.targetItems))
                self.spamProfile[str(self.startUserID)][self.targetItems[target]] = self.targetScore
                self.spamItem[str(self.startUserID)].append(self.targetItems[target])
            self.startUserID += 1
        
        print(self.spamProfile)


In [10]:
attack = RandomAttack('/content/drive/My Drive/KCAMF/Attacks/config/config.conf')
attack.insertSpam()
attack.generateProfiles('Rprofiles.csv')

/content/drive/My Drive/KCAMF/Attacks/config/config.conf
loading training data...
Selecting target items...
Target item       Average rating of the item
224                    3.3333333333333335
998                    3.5
1577                    3.5
855                    3.0
298                    3.1666666666666665
531                    3.4444444444444446
755                    3.5
46                    3.5
890                    3.4285714285714284
680                    3.375
['224', '998', '1577', '855', '298', '531', '755', '46', '890', '680', '224\n', '998\n', '1577\n', '855\n', '298\n', '531\n', '755\n', '46\n', '890\n', '680\n']
Modeling random attack...
defaultdict(<class 'dict'>, {'2370': {'893': 3, '1227': 4, '1154': 1, '233': 5, '111': 1, '50': 2, '680': 5.0, '1076': 1, '1141': 4, '1396': 3, '798': 1, '469': 2, '1269': 2, '1575': 5, '589': 4, '858': 2, '1385': 4, '770': 5, '1393': 1, '2074': 1, '743': 4, '647': 5, '1268': 3, '1493': 3, '1577\n': 5.0, '998\n': 5.0, '224': 5

# **Average Attack**

In [11]:
import random
import numpy as np

class AverageAttack(Attack):
    def __init__(self,conf):
        super(AverageAttack, self).__init__(conf)

    def insertSpam(self,startID=0):
        print('Modeling average attack...')
        itemList = list(self.itemProfile.keys())
        if startID == 0:
            self.startUserID = len(self.userProfile) + 1
        else:
            self.startUserID = startID

        for i in range(int(len(self.userProfile)*self.attackSize)):
            #fill 
            fillerItems = self.getFillerItems()
            for item in fillerItems:
                self.spamProfile[str(self.startUserID)][str(itemList[item])] = round(self.itemAverage[str(itemList[item])], 2)
            
            #target 
            for j in range(self.targetCount):
                target = np.random.randint(len(self.targetItems))
                self.spamProfile[str(self.startUserID)][self.targetItems[target]] = self.targetScore
                self.spamItem[str(self.startUserID)].append(self.targetItems[target])
            
            self.startUserID += 1
      
        print(self.spamProfile)

In [12]:
import pandas as pd

avg_attack = AverageAttack('/content/drive/My Drive/KCAMF/Attacks/config/config.conf')
avg_attack.insertSpam(startID = 5000)
avg_attack.generateSpamProfiles('Avgprofiles.csv')
attack_size = avg_attack.attackSize
target_count = avg_attack.targetCount

/content/drive/My Drive/KCAMF/Attacks/config/config.conf
loading training data...
Selecting target items...
Target item       Average rating of the item
855                    3.0
531                    3.4444444444444446
224                    3.3333333333333335
998                    3.5
680                    3.375
46                    3.5
755                    3.5
979                    3.2857142857142856
880                    3.375
422                    3.3333333333333335
['855', '531', '224', '998', '680', '46', '755', '979', '880', '422', '855\n', '531\n', '224\n', '998\n', '680\n', '46\n', '755\n', '979\n', '880\n', '422\n']
Modeling average attack...
defaultdict(<class 'dict'>, {'5000': {'711': 3.67, '1288': 4.0, '1012': 4.8, '961': 3.0, '189': 3.89, '1780': 4.33, '702': 4.43, '1036': 4.0, '781': 4.8, '1830': 4.33, '12': 4.25, '127': 4.08, '2238': 5.0, '1935': 4.0, '648': 3.6, '2131': 5.0, '833': 4.43, '1396': 4.0, '1005': 4.5, '680': 5.0, '755': 5.0, '224\n': 5.0, '755\n'

IndexError: ignored

In [None]:
import pandas as pd

ldos = pd.read_csv("/content/drive/My Drive/KCAMF/Attacks/dataset/LDos/LDos.csv")
ldos.head()

Unnamed: 0,userID,itemID,rating,time,daytype,season,location,weather,social,endEmo,dominantEmo,mood,physical,decision,interaction
0,23,14,5,3,2,2,1,1,1,2,2,2,1,1,1
1,21,5,3,2,2,2,1,1,2,2,2,2,2,2,2
2,21,6,4,4,2,2,1,1,2,2,2,1,1,2,1
3,22,13,4,3,2,3,2,2,3,2,3,1,1,1,1
4,21,7,3,4,2,2,1,1,2,1,7,1,1,2,1


In [None]:
len(ldos.userID.unique()), len(ldos.itemID.unique())

(121, 1232)

In [None]:
ldos_avg = pd.read_csv("/content/drive/My Drive/KCAMF/Attacks/attackDataset/Avgprofiles.csv", header=None)
ldos_avg.head()

Unnamed: 0,0,1,2
0,5000.0,4198.0,3.0
1,5000.0,3811.0,5.0
2,5000.0,3847.0,4.5
3,5000.0,3658.0,5.0
4,5000.0,4140.0,5.0


In [None]:
ldos_avg_df = pd.DataFrame(columns=ldos.columns)

len_profiles = len(ldos_avg[ldos_avg[0]!=0])

for cnt in range(len_profiles):
  ldos_avg_df.at[cnt, 'userID'] = int(ldos_avg.iloc[cnt,0])
  ldos_avg_df.at[cnt, 'itemID'] = int(ldos_avg.iloc[cnt,1])
  ldos_avg_df.at[cnt, 'rating'] = ldos_avg.iloc[cnt,2]

  z = ldos[ldos.itemID==int(ldos_avg.iloc[cnt,1])].values
  ldos_avg_df.at[cnt, 'time'] = z[0][3]
  ldos_avg_df.at[cnt, 'daytype'] = z[0][4]
  ldos_avg_df.at[cnt, 'season'] = z[0][5]
  ldos_avg_df.at[cnt, 'location'] = z[0][6]
  ldos_avg_df.at[cnt, 'weather'] = z[0][7]
  ldos_avg_df.at[cnt, 'social'] = z[0][8]
  ldos_avg_df.at[cnt, 'endEmo'] = z[0][9]
  ldos_avg_df.at[cnt, 'dominantEmo'] = z[0][10]
  ldos_avg_df.at[cnt, 'mood'] = z[0][11]
  ldos_avg_df.at[cnt, 'physical'] = z[0][12]
  ldos_avg_df.at[cnt, 'decision'] = z[0][13]
  ldos_avg_df.at[cnt, 'interaction'] = z[0][14]

In [None]:
ldos_all_avg = pd.concat([ldos, ldos_avg_df], ignore_index=True, sort=False)
fname = 'attackDataset/LDos_Avg_A_{}_T_{}.csv'.format(int(attack_size*100),target_count)
ldos_all_avg.to_csv(fname, index=False)

In [None]:
len(ldos), len(ldos_all_avg)

(2296, 2763)

# **Love/Hate Attack**

In [None]:
import random
import numpy as np

class LoveHateAttack(Attack):
    def __init__(self,conf):
        super(LoveHateAttack, self).__init__(conf)

    def insertSpam(self,startID=0):
        print('Modeling Love/Hate attack...')
        itemList = list(self.itemProfile.keys())
        if startID == 0:
            self.startUserID = len(self.userProfile) + 1
        else:
            self.startUserID = startID

        for i in range(int(len(self.userProfile)*self.attackSize)):
            #fill 
            fillerItems = self.getFillerItems()
            for item in fillerItems:
                self.spamProfile[str(self.startUserID)][str(itemList[item])] = 5.0
            
            #target 
            for j in range(self.targetCount):
                target = np.random.randint(len(self.targetItems))
                self.spamProfile[str(self.startUserID)][self.targetItems[target]] = 1.0
                self.spamItem[str(self.startUserID)].append(self.targetItems[target])
            
            self.startUserID += 1
      
        print(self.spamProfile)

In [None]:
import pandas as pd

lh_attack = LoveHateAttack('/content/drive/My Drive/KCAMF/Attacks/config/config.conf')
lh_attack.insertSpam(startID = 5000)
lh_attack.generateSpamProfiles('LoveHateprofiles.csv')
attack_size = lh_attack.attackSize
target_count = lh_attack.targetCount

/content/drive/My Drive/KCAMF/Attacks/config/config.conf
loading training data...
Selecting target items...
Target item       Average rating of the item
531                    3.4444444444444446
733                    3.142857142857143
880                    3.375
680                    3.375
979                    3.2857142857142856
755                    3.5
46                    3.5
224                    3.3333333333333335
890                    3.4285714285714284
298                    3.1666666666666665
['531', '733', '880', '680', '979', '755', '46', '224', '890', '298', '531\n', '733\n', '880\n', '680\n', '979\n', '755\n', '46\n', '224\n', '890\n', '298\n']
Modeling Love/Hate attack...
defaultdict(<class 'dict'>, {'5000': {'887': 5.0, '1279': 5.0, '1719': 5.0, '1687': 5.0, '2020': 5.0, '26': 5.0, '404': 5.0, '162': 5.0, '380': 5.0, '944': 5.0, '79': 5.0, '194': 5.0, '1552': 5.0, '509': 5.0, '898': 5.0, '1141': 5.0, '136': 5.0, '119': 5.0, '1823': 5.0, '459': 5.0, '1029': 5.0, '

IndexError: ignored

In [None]:
import pandas as pd

ldos = pd.read_csv("/content/drive/My Drive/KCAMF/Attacks/dataset/LDos/LDos.csv")
ldos.head()

Unnamed: 0,userID,itemID,rating,time,daytype,season,location,weather,social,endEmo,dominantEmo,mood,physical,decision,interaction
0,23,14,5,3,2,2,1,1,1,2,2,2,1,1,1
1,21,5,3,2,2,2,1,1,2,2,2,2,2,2,2
2,21,6,4,4,2,2,1,1,2,2,2,1,1,2,1
3,22,13,4,3,2,3,2,2,3,2,3,1,1,1,1
4,21,7,3,4,2,2,1,1,2,1,7,1,1,2,1


In [None]:
len(ldos.userID.unique()), len(ldos.itemID.unique())

(121, 1232)

In [None]:
ldos_lh = pd.read_csv("/content/drive/My Drive/KCAMF/Attacks/attackDataset/LoveHateprofiles.csv", header=None)
ldos_lh.head()

Unnamed: 0,0,1,2
0,5000.0,3653.0,5.0
1,5000.0,102.0,5.0
2,5000.0,4102.0,5.0
3,5000.0,122.0,5.0
4,5000.0,4303.0,5.0


In [None]:
ldos_lh_df = pd.DataFrame(columns=ldos.columns)

len_profiles = len(ldos_lh[ldos_lh[0]!=0])

for cnt in range(len_profiles):
  ldos_lh_df.at[cnt, 'userID'] = int(ldos_lh.iloc[cnt,0])
  ldos_lh_df.at[cnt, 'itemID'] = int(ldos_lh.iloc[cnt,1])
  ldos_lh_df.at[cnt, 'rating'] = ldos_lh.iloc[cnt,2]

  z = ldos[ldos.itemID==int(ldos_lh.iloc[cnt,1])].values
  ldos_lh_df.at[cnt, 'time'] = z[0][3]
  ldos_lh_df.at[cnt, 'daytype'] = z[0][4]
  ldos_lh_df.at[cnt, 'season'] = z[0][5]
  ldos_lh_df.at[cnt, 'location'] = z[0][6]
  ldos_lh_df.at[cnt, 'weather'] = z[0][7]
  ldos_lh_df.at[cnt, 'social'] = z[0][8]
  ldos_lh_df.at[cnt, 'endEmo'] = z[0][9]
  ldos_lh_df.at[cnt, 'dominantEmo'] = z[0][10]
  ldos_lh_df.at[cnt, 'mood'] = z[0][11]
  ldos_lh_df.at[cnt, 'physical'] = z[0][12]
  ldos_lh_df.at[cnt, 'decision'] = z[0][13]
  ldos_lh_df.at[cnt, 'interaction'] = z[0][14]

In [None]:
ldos_all_lh = pd.concat([ldos, ldos_lh_df], ignore_index=True, sort=False)
fname = 'attackDataset/LDos_LH_A_{}_T_{}.csv'.format(int(attack_size*100),target_count)
ldos_all_lh.to_csv(fname, index=False)

In [None]:
len(ldos), len(ldos_all_lh)

(2296, 2780)

# **Post**

In [None]:
import pandas as pd

ldos = pd.read_csv("/content/drive/My Drive/KCAMF/Attacks/dataset/LDos/LDos.csv")

In [None]:
import numpy as np

msk = np.random.rand(len(ldos)) < 0.8

ldos_train = ldos[msk]
ldos_test = ldos[~msk]

In [None]:
fname = 'dataset/LDos/LDos_train.csv'
ldos_train.to_csv(fname, index=False)

fname = 'dataset/LDos/LDos_test.csv'
ldos_test.to_csv(fname, index=False)

In [None]:
at_size = [0.01, 0.03, 0.05, 0.1, 0.15, 0.2]
tar_count = [1, 5, 10]

for attack_size in at_size:
  for target_count in tar_count:
    fname = 'attackDataset/LDos_Avg_A_{}_T_{}.csv'.format(int(attack_size*100),target_count)
    ldos_avg_df = pd.read_csv(fname)

    msk2 = msk
    arr2 = np.ones(len(ldos_avg_df) - len(msk), dtype=bool)
    msk2 = np.append(msk2, arr2) 

    ldos_avg_train_df = ldos_avg_df[msk2]

    fname = 'attackDataset/LDos_Avg_Train_A_{}_T_{}.csv'.format(int(attack_size*100),target_count)
    ldos_avg_train_df.to_csv(fname, index=False)

In [None]:
at_size = [0.01, 0.03, 0.05, 0.1, 0.15, 0.2]
tar_count = [1, 5, 10]

for attack_size in at_size:
  for target_count in tar_count:
    fname = 'attackDataset/LDos_LH_A_{}_T_{}.csv'.format(int(attack_size*100),target_count)
    ldos_lh_df = pd.read_csv(fname)

    msk2 = msk
    arr2 = np.ones(len(ldos_lh_df) - len(msk), dtype=bool)
    msk2 = np.append(msk2, arr2) 

    ldos_lh_train_df = ldos_lh_df[msk2]

    fname = 'attackDataset/LDos_LH_Train_A_{}_T_{}.csv'.format(int(attack_size*100),target_count)
    ldos_lh_train_df.to_csv(fname, index=False)

In [None]:
ldos_test = pd.read_csv("/content/drive/My Drive/KCAMF/Attacks/dataset/LDos/LDos_test.csv")
ldos_avg_20_10 = pd.read_csv("/content/drive/My Drive/KCAMF/Attacks/attackDataset/LDos_Avg_A_20_T_10.csv")

len(ldos_test), len(ldos_avg_20_10)

(438, 2763)

In [None]:
ldos_avg_20_10_new = pd.concat([ldos_avg_20_10, ldos_test])
ldos_avg_20_10_new = ldos_avg_20_10_new.drop_duplicates(keep=False)
len(ldos_avg_20_10_new)

2267

In [None]:
ldos_avg_20_10_new.to_csv('attackDataset/LDos_Avg_Train_A_20_T_10.csv', index=False)