In [474]:
from os.path import abspath, join, dirname
import random
from urllib.request import urlopen
import collections
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
import datetime

In [475]:
FILES = {
    'first:male': 'helperFiles/dist.male.first',
    'first:female': 'helperFiles/dist.female.first',
    'last': 'helperFiles/dist.all.last',
    'instruments': 'helperFiles/initial-instruments.txt',
    'genres' : 'helperFiles/initial-genres.txt',
    'streets' : 'helperFiles/london_streets.txt',
    'experience' : 'helperFiles/experience.txt',
    'schools' : 'helperFiles/schools.txt',
    'education' : 'helperFiles/education.txt',
    'group' : 'helperFiles/group_names.txt',
    'projects' : 'helperFiles/projects.txt',
    'awards' : 'helperFiles/awards.txt',
    'description' : 'helperFiles/description.txt'
}

def get_id():
    page = urlopen('https://www.uuidgenerator.net/')
    soup = BeautifulSoup(page, "html.parser")
    return (str(soup.find_all("span", attrs={"id":"generated-uuid"})).split(">")[1].split("<")[0].replace('-', ''))

def get_name(filename):
    selected = random.random() * 90
    with open(filename) as name_file:
        for line in name_file:
            name, _, cummulative, _ = line.split()
            if float(cummulative) > selected:
                return name


def get_first_name(gender=None):
    if gender not in ('male', 'female'):
        gender = random.choice(('male', 'female'))
    return get_name(FILES['first:%s' % gender]).capitalize()


def get_last_name():
    return get_name(FILES['last']).capitalize()


def get_full_name(gender=None):
    return u"%s %s" % (get_first_name(gender), get_last_name())

def get_instrument_attributes(filename):
    selected = random.randint(0,53)
    with open(filename) as name_file:
        for i, line in enumerate(name_file):
            idAttribute, name, profession = line.split("VALUES('")[1].split(",")
            if float(i) > selected:
                return (idAttribute.split("'")[0], name.split("'")[1])
            
def get_genre_attributes(filename):
    selected = random.randint(0,52)
    with open(filename) as name_file:
        for i, line in enumerate(name_file):
            idAttribute, genre = line.split("VALUES('")[1].split(",")
            if float(i) > selected:
                return (idAttribute.split("'")[0], genre.split("'")[1])

def get_instrument():
    return get_instrument_attributes(FILES['instruments'])

def get_genre():
    return get_genre_attributes(FILES['genres'])

streetNames = {}
groupNames = {}
projectsNames = {}
educationNames = {}
experienceNames = {}
schoolNames = {}
awardNames = {}
descriptionNames = {}

with open(FILES['streets']) as name_file:
    for i, line in enumerate(name_file):
        streetNames[i] = line.split("\n")[0]
        
with open(FILES['experience']) as name_file:
    for i, line in enumerate(name_file):
        experienceNames[i] = line.split("\n")[0]
        
with open(FILES['schools']) as name_file:
    for i, line in enumerate(name_file):
        schoolNames[i] = line.split("\n")[0]
        
with open(FILES['education']) as name_file:
    for i, line in enumerate(name_file):
        educationNames[i] = line.split("\n")[0]

with open(FILES['group']) as name_file:
    for i, line in enumerate(name_file):
        groupNames[i] = line.split("\n")[0]
        
with open(FILES['projects']) as name_file:
    for i, line in enumerate(name_file):
        projectsNames[i] = line.split("\n")[0]
        
with open(FILES['awards']) as name_file:
    for i, line in enumerate(name_file):
        awardNames[i] = line.split("\n")[0]
        
with open(FILES['description']) as name_file:
    for i, line in enumerate(name_file):
        descriptionNames[i] = line.split("\n")[0]

groupNamesLen = len(groupNames)  
projectsNamesLen = len(projectsNames)
educationNamesLen = len(educationNames)
schoolNamesLen = len(schoolNames)
experienceNamesLen = len(experienceNames)
awardNamesLen = len(awardNames)
streetNamesLen = len(streetNames)
descriptionNamesLen = len(descriptionNames)

def get_location():
    for _ in range(1000):
        try:
            streetName = streetNames[random.randint(0, streetNamesLen)]
            num = random.randint(1,100)
            geolocator = Nominatim(user_agent="specify_your_app_name_here")
            roadLocation = str(num) + " " + streetName + " Road London"
            streetLocation = str(num) + " " + streetName + " St London"
            try:
                location = geolocator.geocode(roadLocation)
                latitude = location.latitude
                longitude = location.longitude
                finalPlace = roadLocation.split(" London")[0]
            except:
                location = geolocator.geocode(streetLocation)
                latitude = location.latitude
                longitude = location.longitude
                finalPlace = streetLocation.split(" London")[0]
            return (finalPlace, latitude, longitude)
        except:
            pass

def get_image_by_instrument(instrumentList):
    for instrument in instrumentList:
        try:
            page = urlopen('https://unsplash.com/s/photos/'+instrument)
            soup = BeautifulSoup(page, "html.parser")
            imagesSet = set()
            for i, pic in enumerate(str(soup).split("regular")):
                if i>0:
                    imagesSet.add(pic.split("small")[0])
            return (random.choice(tuple(imagesSet)).split('\"')[2][:-1])
        except:
            pass

def get_years(numberOfInstances):
    actualYear = datetime.datetime.now().year
    month = random.randint(1,12)
    long = random.randint(2,4)
    dataIni = ["{}-{}-01 00:00:00".format(actualYear-long, month)]
    dataEnd = ["{}-{}-01 00:00:00".format(actualYear-1, month)]
    lastIni = actualYear-long
    for i in range(numberOfInstances):
        long = random.randint(2,4)
        dataEnd.append("{}-{}-01 00:00:00".format(lastIni, month))
        dataIni.append("{}-{}-01 00:00:00".format(lastIni-long, month))
        lastIni = lastIni-long
    return (dataIni, dataEnd)

def get_random_school():
    return (schoolNames[random.randint(0, schoolNamesLen-1)])
    
def get_random_experience():
    return (experienceNames[random.randint(0, experienceNamesLen-1)])

def get_random_education():
    return (educationNames[random.randint(0, educationNamesLen-1)])

def get_random_project():
    return (projectsNames[random.randint(0, projectsNamesLen-1)])

def get_random_group_name():
    return (groupNames[random.randint(0, groupNamesLen-1)])

def get_random_award():
    return (awardNames[random.randint(0, awardNamesLen-1)])

def get_random_description():
    return (descriptionNames[random.randint(0, descriptionNamesLen-1)])

In [476]:
#Creation of variable
name = get_first_name()
lastName = get_last_name()
fullName = name + " " + lastName

accountId= get_id()
profileId= get_id()
locationId= get_id()
facebookId= get_id()
instagramId= get_id()

email = "{}@playtodayapp.com".format(name.lower()+lastName.lower())
web = "http://www.{}music.com".format(name.lower()+lastName.lower())
accountIdStr = "@{}AccountId".format(name.lower())
profileIdStr = "@{}ProfileId".format(name.lower())
rating = round(random.uniform(4,10))
distance = random.randint(0,20)

#Get instruments and genres
instrumentsList = []
instrumentsIdList = []
for _ in range(random.randint(2,4)):
    idInstrument, instrumentName = get_instrument()
    instrumentsList.append(instrumentName)
    instrumentsIdList.append(idInstrument)
genresList = []
genresIdList = []                          
for _ in range(random.randint(2,4)):
    idGenre, genreName = get_genre()
    genresList.append(genreName)
    genresIdList.append(idGenre)

genresListStr = ', '.join(genresList)
jobTitle = ' | '.join(instrumentsList)
city = "London"
adress, latitude, longitude = get_location()

image = get_image_by_instrument(instrumentsList)

educationNumber = random.randint(2,4)
educationYearsList = get_years (educationNumber)
experienceNumber = random.randint(0,5)
experienceYearsList = get_years (experienceNumber)
projectsNumber = random.randint(0,2)
projectsYearsList = get_years (projectsNumber)
awardsNumber = 0 if random.randint(0,10) < 3 else 1 
awardsYearsList = get_years (awardsNumber)

instrumentsListed = " and ".join([", ".join(instrumentsList[:-1]),instrumentsList[-1]])
genresListed = " and ".join([", ".join(genresList[:-1]),genresList[-1]])
description = get_random_description().format(name, instrumentsListed, genresListed)

In [477]:
#File writting
fileName = "{}_user.sql".format(name)
file = open(fileName, "w")
file.write("/* ================== {} {} - MUSICIAN ==================*/\n".format(name.upper(),lastName.upper()))
file.write("SET {} = '{}';\n".format(accountIdStr, accountId))
file.write("SET {} = '{}';\n\n".format(profileIdStr, profileId))
file.write("-- Account and profile\n")
file.write("INSERT INTO USER_ACCOUNT(id, account_enabled, account_locked,  auth_provider, credentials_expired, email, language, password, social_id, version) VALUES ({}, true, false, 'EMAIL', false, '{}', 'en', '$2a$10$p1Wx2QdGDunRsXs/yrAuHOnx3n8wktjGn55cFC6mz4HYx1fryb8n2', null, 0);\n".format(accountIdStr,email))
file.write("INSERT INTO PROFILE(id, description, distance, image_url, name ,version, category, email, job_title, rating, web_url, account_id) VALUES ({}, '{}', {}, '{}', '{}', 0, 'MUSICIAN', '{}', '{}', {}, '{}', {});\n".format(profileIdStr, description, distance, image, fullName, email, jobTitle, rating, web, accountIdStr))
file.write("INSERT INTO MUSICIAN_PROFILE(id) VALUES({});\n\n".format(profileIdStr))
file.write("INSERT INTO CARD(id, coretag_type, top_attr, middle_attr, bottom_attr, distance, image_url, name, owner,  coretag_subtype, bottom_right_attr, version) values({}, 'PROFILE', '{}', '{}', '{}', null, '{}', '{}',  {}, 'MUSICIAN', null, 0);\n".format(profileIdStr, jobTitle, genresListStr, city, image, fullName, profileIdStr))
file.write("-- Locations\n")
file.write("INSERT INTO PROFILE_LOCATION(id, address, city, latitude, longitude, latitude_delta, longitude_delta, selected, profile_id) VALUES ('{}', '{}', '{}', {}, {}, 0.0, 0.0, true, {});\n".format(locationId, adress, city, latitude, longitude, profileIdStr))
file.write("-- Instruments\n")
for i, instrument in enumerate(instrumentsList):
    instrumentId = instrumentsIdList[i]
    instrumentLevel = random.randint(5,10)
    instrumentIsTeachable = str(random.choice([True, False]))
    file.write("INSERT INTO PROFILE_INSTRUMENT(level, teachable, instrument_id, profile_id) VALUES({}, {}, '{}', {});\n".format(instrumentLevel, instrumentIsTeachable, instrumentId, profileIdStr))
file.write("-- Genres\n")
for i, genre in enumerate(genresList):
    genreId = genresIdList[i]
    file.write("INSERT INTO PROFILE_GENRE(genre_id, profile_id) VALUES ('{}', {});\n".format(genreId, profileIdStr))
file.write("-- Social\n")
file.write("INSERT INTO SOCIAL_MEDIA_ITEM(id, name, media_type, profile_id, social_access_token, social_id, auth, enabled) VALUES('{}', '{}', 'FACEBOOK',  {}, 'token', 'socialId', false, true);\n".format(facebookId, name.lower()+lastName+"FB", profileIdStr))
file.write("INSERT INTO SOCIAL_MEDIA_ITEM(id, name, media_type, profile_id, social_access_token, social_id, auth, enabled) VALUES('{}',  '{}', 'INSTAGRAM', {}, 'token', 'socialId', false, true);\n".format(instagramId, name.lower()+lastName+"IG", profileIdStr))
file.write("-- Education\n")
for i in range(educationNumber):
    educationId = get_id()
    yearEnd = educationYearsList[1][i]
    yearIni = educationYearsList[0][i]
    school = get_random_school()
    title = get_random_education()
    file.write("INSERT INTO EDUCATION_EXPERIENCE(id, date_end, date_start, school_name, title, profile_id) VALUES ('{}', '{}', '{}', '{}', '{}', {});\n".format(educationId, yearEnd, yearIni, school, title, profileIdStr))
file.write("-- Professional experience\n")
for i in range(experienceNumber):
    experienceId = get_id()
    yearEnd = experienceYearsList[1][i]
    yearIni = experienceYearsList[0][i]
    companyName = get_random_group_name()
    title = get_random_experience()
    file.write("INSERT INTO PROFESSIONAL_EXPERIENCE(id, company_name, date_end, date_start, title, profile_id) VALUES('{}', '{}', '{}', '{}', '{}', {});\n".format(experienceId, companyName, yearEnd, yearIni, title, profileIdStr))
file.write("-- Projects experience\n")
for i in range(projectsNumber):
    projectsId = get_id()
    yearEnd = projectsYearsList[1][i]
    yearIni = projectsYearsList[0][i]
    organizationName = get_random_group_name()
    title = get_random_project()
    file.write("INSERT INTO PROJECT_EXPERIENCE(id, date_end, date_start, organization_name, title, profile_id) VALUES('{}', '{}', '{}', '{}', '{}', {});\n".format(projectsId, yearEnd, yearIni, organizationName, title, profileIdStr))
file.write("-- Awards experience\n")
for i in range(awardsNumber):
    awardId = get_id()
    yearEnd = awardsYearsList[1][i]
    yearIni = awardsYearsList[0][i]
    organizationName = get_random_school()
    title = get_random_award()
    file.write("INSERT INTO AWARD_EXPERIENCE(id, date_end, date_start, organization_name, title, profile_id) VALUES('{}', '{}', '{}', '{}', '{}', {});\n".format(awardId, yearEnd, yearIni, organizationName, title, profileIdStr))
file.close()