In [79]:
import pandas as pd
import requests
import json
from my_library import usage_df

In [80]:
usage_df = usage_df.map(lambda string: string.split('//') if isinstance(string, str) else string)
usage_df['name'], usage_df['purpose'], usage_df['functionality'] = \
  usage_df['name'].map((lambda x: x[0]), 'ignore'), usage_df['purpose'].map((lambda x: x[0]), 'ignore'), usage_df['functionality'].map((lambda x: x[0]), 'ignore')

reduce = lambda array: array[0] + reduce(array[1:]) if len(array) >= 1 else array
convert = lambda field: [x if isinstance(x, list) else [] for x in usage_df[field]]

platform_set = convert('platforms')
platform_set = set(reduce(platform_set))

prog_lang_set = convert('programming_languages')
prog_lang_set = set(reduce(prog_lang_set))

categories_set = convert('categories')
categories_set = set(reduce(categories_set))

technologies_set = convert('technologies')
technologies_set = set(reduce(technologies_set))

strings = [
str(list(prog_lang_set)),
str(list(categories_set)),
str(list(technologies_set)),
str(list(platform_set))]

In [81]:
system_prompt1 = '''A malevolous entity of mischief has taken uncorrupted strings of text and made copies of them with a couple errors.
Your task is to pick the correct version of these strings.

Example input:

['C#','Csharp', 'c#', 'python', 'Python', 'Python3', 'Javascript', 'JS', 'javascript']

Output:

{
  "keys": ["C#",
  "Python",
  "JavaScript"]
}

Rules:
1. The correct strings are always capitalized
2. The output must be in json format
'''

system_prompt2 = '''A malevolous entity of mischief has taken uncorrupted strings of text and made copies of them with a couple errors.
Your task is to make a correspondence between the correct strings and the incorrect ones in the following way:

Example output:

{
  'C#': [csharp, c#, CSHARP, Csharp],
  'C++': [cplusplus, c++, CPP, cpp],
  Javascript: [js. javascript, jscript]
}

Rules:
1. The correct strings are always capitalized
2. No two incorrect strings can correspond to the same correct one
'''

In [83]:
url = "http://localhost:1234/v1/chat/completions"
headers = {"Content-Type": "application/json"}
responses = []

for string in strings:
    payload = {
        "messages": [
            {"role": "system", "content": system_prompt1},
            {"role": "user", "content": string}
        ],
        "temperature": 0.2,
        "max_tokens": 400,
        "stop": ["\n\n", "[/INST]"],
    }
    
    answer = requests.post(url, json=payload, headers=headers)
    answer.raise_for_status()
    answer = answer.json()['choices'][0]['message']['content']
    responses.append(answer)
    with open('keys', 'w') as file:
        file.write(answer)

KeyboardInterrupt: 

In [None]:
json.loads('''
{
  "C#": [
    "csharp",
    "C#",
    "CSharp",
    "C#",
    "CSHARP"
  ],
  "C++": [
    "cplusplus",
    "c++",
    "CPP",
    "cpp",
    "C++"
  ],
  "Javascript": [
    "js. javascript",
    "jscript",
    "javascript",
    "Javascript",
    "JavaScript"
  ],
  "Python": [
    "python",
    "PYTHON",
    "python3",
    "py",
    "python"
  ],
  "Java": [
    "java",
    "JAVA",
    "Java",
    "jva",
    "java"
  ],
  "PHP": [
    "php",
    "PHP",
    "Php",
    "pHP",
    "php"
  ],
  "Go": [
    "go",
    "GO",
    "GoLang",
    "golang",
    "go"
  ],
  "Ruby": [
    "ruby",
    "RUBY",
    "Ruby",
    "rb",
    "ruby"
  ],
  "Swift": [
    "swift",
    "Swift",
    "SwiftLang",
    "swiftlang",
    "swift"
  ],
  "Kotlin": [
    "kotlin",
    "Kotlin",
    "kotlinlang",
    "kotlin",
    "KOTLIN"
  ],
  "Rust": [
    "rust",
    "Rust",
    "rustlang",
    "rust",
    "R"]
}
''')

{'C#': ['csharp', 'C#', 'CSharp', 'C#', 'CSHARP'],
 'C++': ['cplusplus', 'c++', 'CPP', 'cpp', 'C++'],
 'Javascript': ['js. javascript',
  'jscript',
  'javascript',
  'Javascript',
  'JavaScript'],
 'Python': ['python', 'PYTHON', 'python3', 'py', 'python'],
 'Java': ['java', 'JAVA', 'Java', 'jva', 'java'],
 'PHP': ['php', 'PHP', 'Php', 'pHP', 'php'],
 'Go': ['go', 'GO', 'GoLang', 'golang', 'go'],
 'Ruby': ['ruby', 'RUBY', 'Ruby', 'rb', 'ruby'],
 'Swift': ['swift', 'Swift', 'SwiftLang', 'swiftlang', 'swift'],
 'Kotlin': ['kotlin', 'Kotlin', 'kotlinlang', 'kotlin', 'KOTLIN'],
 'Rust': ['rust', 'Rust', 'rustlang', 'rust', 'R']}

In [None]:
json.loads('''
{
  "C#": [
    "csharp",
    "c#",
    "CSHARP",
    "Csharp",
    "C#",
    "C#"
  ],
  "C++": [
    "cplusplus",
    "c++",
    "CPP",
    "cpp"
  ],
  "Javascript": [
    "js. javascript",
    "jscript"
  ],
  "TypeScript": [
    "TypeScript",
    "typescript"
  ],
  "Python": [
    "python",
    "python3",
    "PYTHON",
    "PYTHON"
  ],
  "Java": [
    "JAVA",
    "java"
  ],
  "PHP": [
    "php",
    "PHP"
  ],
  "Ruby": [
    "RUBY",
    "ruby"
  ],
  "Swift": [
    "swift",
    "SWIFT"
  ],
  "Kotlin": [
    "kotlin",
    "Kotlin"
  ],
  "Rust": [
    "rust",
    "Rust"
  ],
  "Go": [
    "go",
    "Go"
  ],
  "HTML": [
    "html",
    "HTML"
  ],
  "CSS": [
    "css",
    "CSS"
  ],
  "SQL": [
    "sql",
    "SQL"
  ],
  "Bash": [
    "bash",
    "Shell",
    "ZSH"
  ],
  "Node.js": [
    "node.js",
    "nodejs"
  ],
''')

In [None]:
json.loads('''
{
  "DATA-ENGINEERING": [
    "data-engineering",
    "Data Engineering",
    "Data-Engineering",
    "Data Engineering"
  ],
  "DEPENDENCY-MANAGER": [
    "dependency-manager",
    "Dependency Manager",
    "Dependency-Manager",
    "Dependency Manager"
  ],
  "COMMUNITY": [
    "community",
    "Community",
    "COMMUNITY",
    "Community"
  ],
  "WEBDEV": [
    "webdev",
    "WebDev",
    "Webdev",
    "Web Development"
  ],
  "OCR": [
    "ocr",
    "OCR",
    "Ocr",
    "Optical Character Recognition"
  ],
  "AI-MACHINELEARNING": [
    "ai-machinelearning",
    "AI/MachineLearning",
    "AI Machine Learning",
    "AI/MACHINELEARNING"
  ],
  "TESTING": [
    "Testing",
    "testing",
    "TESTING",
    "Test"
  ],
  "OPERATING-SYSTEM": [
    "operating-system",
    "Operating System",
    "Operating-System",
    "OS"
  ],
  "SPRINGCLOUD": [
    "springcloud",
    "SpringCloud",
    "Spring Cloud",
    "SpringCloud"
  ],
  "TOOL": [
    "Tool",
    "tool",
    "TOOL",
    "Tools"
  ],
  "MUSIC": [
    "music",
    "Music",
    "MUSIC",
    "Musical"
  ],
  "GRAPHICS": [
    "graphics",
    "Graphics"
  ]
}
''')

categories_set

{'AI/MACHINELEARNING',
 'AI/MachineLearning',
 'ANIMATION',
 'Algorithms',
 'Api',
 'Business',
 'DATA',
 'DEV OPS',
 'DEVELOPMENT',
 'DEVOPSS',
 'DEVOPs',
 'Data',
 'Desktop',
 'DevOps',
 'Developer-Tools',
 'HOME-AUTOMATION',
 'IOT',
 'LEARNING',
 'Learning',
 'MOBILE',
 'MediaReproduction',
 'Music',
 'NLP',
 'OTHER',
 'Other',
 'PROGRAMMINGLIBRARY',
 'Productivity',
 'ProgrammingLibrary',
 'PureMaths',
 'Research',
 'Screen-capture',
 'Security',
 'TOOL',
 'Testing',
 'Tool',
 'VPN',
 'WEB',
 'Web',
 'accessibility',
 'admin-dashboard',
 'ai-machinelearning',
 'ai/machinelearning',
 'algorithms',
 'android',
 'animation',
 'api',
 'api-gateway',
 'artificial-intelligence',
 'automation',
 'backend',
 'backup',
 'bittorrent',
 'blockchain',
 'business-intelligence',
 'businessintelligence',
 'cli',
 'collaboration',
 'command-line-tool',
 'commerce',
 'community',
 'component-library',
 'configuration-management',
 'content-management-system',
 'covid',
 'crm',
 'cross-platform',
 '