In [1]:
!pip install SPARQLWrapper



In [2]:
import random
import csv

from SPARQLWrapper import JSON, SPARQLWrapper
from collections import defaultdict


WIKIDATA_ENDPOINT = "https://query.wikidata.org/sparql"
sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")


prefix = """
	PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wds: <http://www.wikidata.org/entity/statement/>
    PREFIX wdv: <http://www.wikidata.org/value/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX wikibase: <http://wikiba.se/ontology#>
    PREFIX p: <http://www.wikidata.org/prop/>
    PREFIX ps: <http://www.wikidata.org/prop/statement/>
    PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX bd: <http://www.bigdata.com/rdf#>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
"""

test_num = 100

def write_csv(fname, data):
	with open(fname, 'w') as f:
		write = csv.writer(f)    
		write.writerows(data)

def query_sparql(query):
	sparql.setQuery(prefix+query)
	sparql.setReturnFormat(JSON)

	results = sparql.query().convert()

	return results



def print_examples(training_data, test):
	num = 2
	print("training_data_example")
	for x in range(num):
		print(training_data[x])

	print()	
	print("test_data_example")
	for x in range(num):
		print(test[x])

def getPronouns(gender, val_type="subj"):
	if gender == "female":
		if val_type == "subj":
			return "she"
		elif val_type == "poss" or val_type == "obj":
			return "her"
	elif gender == "male":
		if val_type == "subj":
			return "he"
		elif val_type == "poss":
			return "his"
		elif val_type == "obj":
			return "him"
	else:
		if val_type == "subj":
			return "they"
		elif val_type == "poss":
			return "their"
		elif val_type == "obj":
			return "them"
	return "they"

In [3]:
"""
Start with (only query without a filter by an entity
--> Genre Info

Join
  --> Award Info by TV name, extract info from genre first then award info
  --> Creator Shows by TV name, extract info from genre info then run creator shows
  --> CREATOR_DIRECTED_OTHER by TV name/show, extract info from genre info then run created directed other
  --> ACTOR_SWITCH_INFOR by TV name/show, extract info from genre info then run function
  -->
"""

class TVQueries(object):
    GENRE_INFO = """
        SELECT ?tv ?genre ?genre1 ?genreNode
                WHERE 
                {
                ?TVType wdt:P31 wd:Q5398426.
                ?TVType rdfs:label ?tv.
                FILTER (LANG(?tv) = "en").               

                ?TVType wdt:P136 ?genreNode.
                ?genreNode rdfs:label ?genre.
                  FILTER(LANG(?genre) = "en") . 

                  ?TVType wdt:P136 ?genreNode1.
                ?genreNode1 rdfs:label ?genre1.
                  FILTER(LANG(?genre1) = "en") . 
                } 
        """
    AWARD_INFO = """
       SELECT  ?tv ?award ?date
        WHERE 
        {


          ?TVType wdt:P31 wd:Q5398426.
          ?TVType rdfs:label ?tv.
          FILTER (LANG(?tv) = "en").
          FILTER (STR(?tv) = "{entity}").


          ?TVType p:P166 ?awardInfo.
          ?awardInfo ps:P166 ?awardNode.

          {?awardNode wdt:P31 ?idType.}
          UNION
          {?awardNode wdt:P31 ?idNode .
           ?idNode wdt:P279 ?idType.}
          UNION
          {?awardNode wdt:P31 ?idNode1 .
           ?idNode1 wdt:P279 ?idNode2.
           ?idNode2 wdt:P279 ?idType.}

          ?awardNode rdfs:label ?award .
          FILTER(LANG(?award) = "en") .
          OPTIONAL{
            ?awardInfo pq:P585 ?date .
          }
            }

    """

    CREATOR_SHOWS = """
                    SELECT ?tv ?creator ?creatorNode ?gender ?show ?showNode ?start ?end
                   WHERE
                   {
                     ?TVType wdt:P31 wd:Q5398426.
                     ?TVType rdfs:label ?tv.
                     FILTER (LANG(?tv) = "en").
                     FILTER (STR(?tv) = "{entity}").
                      
                     ?TVType wdt:P170 ?creatorNode.
                     ?creatorNode rdfs:label ?creator .
                     FILTER(LANG(?creator) = "en") .

                     OPTIONAL{
                       ?creatorNode wdt:P21 ?genderNode.
                       ?genderNode rdfs:label ?gender .
                     FILTER(LANG(?gender) = "en") .}

                      {?showNode wdt:P170 ?creatorNode.}
                       UNION
                     {
                       ?showNode wdt:P1431 ?creatorNode.}

                     ?showNode rdfs:label ?show .
                     ?showNode wdt:P31/wdt:P279* wd:Q5398426.
                      FILTER(LANG(?show) = "en") .

                     OPTIONAL{
                       ?showNode wdt:P580 ?start.
                       ?showNode wdt:P582 ?end.
                       } 
                   } """

    CREATOR_DIRECTED_OTHER = """
                   %s

                   SELECT ?creator ?creatorNode ?gender ?show ?showNode ?episode (SAMPLE(?date) as ?date)
                   WHERE
                   {
                     BIND(wd:%s AS ?item).

                     ?item wdt:P170 ?creatorNode.
                     ?creatorNode rdfs:label ?creator .
                     FILTER(LANG(?creator) = "en") .
                     FILTER (STR(?creator) = "{entity}").
                     
                     OPTIONAL{
                       ?creatorNode wdt:P21 ?genderNode.
                       ?genderNode rdfs:label ?gender .
                     FILTER(LANG(?gender) = "en") .}

                       ?episodeNode wdt:P57 ?creatorNode.
                     ?episodeNode wdt:P179 ?showNode.
                     FILTER(?showNode != ?item).
                     ?episodeNode rdfs:label ?episode .
                     ?episodeNode wdt:P31/wdt:P279* wd:Q21191270.
                      FILTER(LANG(?episode) = "en") .
                     ?showNode rdfs:label ?show .
                     #?showNode wdt:P31/wdt:P279* wd:Q5398426.
                      FILTER(LANG(?show) = "en") .

                      OPTIONAL{
                       ?episodeNode wdt:P577 ?date.
                       }
                   } GROUP BY ?creator ?creatorNode ?gender ?show ?showNode ?episode """

    creator_directed_same = """
                   %s

                   SELECT ?creator ?creatorNode ?gender ?episode ?episodeNode (SAMPLE(?date) as ?date)
                   WHERE
                   {
                     BIND(wd:%s AS ?item).

                     ?item wdt:P170 ?creatorNode.
                     ?creatorNode rdfs:label ?creator .
                     FILTER(LANG(?creator) = "en") .
                     FILTER (STR(?creator) = "{entity}").

                     OPTIONAL{
                       ?creatorNode wdt:P21 ?genderNode.
                       ?genderNode rdfs:label ?gender .
                     FILTER(LANG(?gender) = "en") .}

                       ?episodeNode wdt:P57 ?creatorNode.
                     ?episodeNode wdt:P179 ?item.
                     ?episodeNode rdfs:label ?episode .
                     ?episodeNode wdt:P31/wdt:P279* wd:Q21191270.
                      FILTER(LANG(?episode) = "en") .

                     OPTIONAL{
                       ?episodeNode wdt:P577 ?date.
                       }
                   } GROUP BY ?creator ?creatorNode ?gender ?episode ?episodeNode """

    ACTOR_SWITCH_INFOR = """
                   %s

                   SELECT ?cast ?castNode ?gender ?show ?showNode ?numEps (COUNT(?oedge) as ?count)
                   WHERE
                   {
                   BIND(wd:%s AS ?item).

                     ?item p:%s ?castInfo.
                         ?castInfo ps:%s ?castNode.
                         ?castNode rdfs:label ?cast .
                         FILTER(LANG(?cast) = "en") .
                         

                         ?castInfo pq:P453 ?roleNode .

                     OPTIONAL{
                       ?castNode wdt:P21 ?genderNode.
                       ?genderNode rdfs:label ?gender .
                     FILTER(LANG(?gender) = "en") .}

                      ?showNode wdt:%s ?castNode.
                      {?showNode wdt:P31 wd:Q5398426.}
                      UNION
                      {?showNode wdt:P31 wd:Q581714.}

                      ?showNode rdfs:label ?show .
                      %s
                      FILTER(LANG(?show) = "en") .
                      FILTER (STR(?show) = "{entity}").

                     {?otheri ?oedge ?castNode .
                     FILTER ( ?oedge in (wdt:P161, wdt:P725) )}
                       UNION
                     {?castNode ?oedge ?otherj .
                     FILTER ( ?oedge in (wdt:P166) )}

                     ?showNode wdt:P1113 ?numEps.

                 } GROUP BY ?cast ?castNode ?gender ?show ?showNode ?numEps ORDER BY DESC(?count)
                 DESC(?numEps)"""

    get_actor_switch_info_complete = """
                   %s

                   SELECT ?cast ?castNode ?gender ?show ?showNode ?imdb ?numEps (COUNT(?oedge) as ?count)
                   WHERE
                   {
                   BIND(wd:%s AS ?item).

                   ?item wdt:%s ?castNode.
                     ?castNode rdfs:label ?cast .
                     FILTER(LANG(?cast) = "en") .
                     
                     
                     OPTIONAL{
                       ?castNode wdt:P21 ?genderNode.
                       ?genderNode rdfs:label ?gender .
                     FILTER(LANG(?gender) = "en") .}

                      ?showNode wdt:%s ?castNode.
                      {?showNode wdt:P31 wd:Q5398426.}
                      UNION
                      {?showNode wdt:P31 wd:Q581714.}

                      ?showNode rdfs:label ?show .
                      %s
                      FILTER(LANG(?show) = "en") .
                      FILTER (STR(?show) = "{entity}").
                      ?showNode wdt:P345 ?imdb.
                      
                      
                     {?otheri ?oedge ?castNode .
                     FILTER ( ?oedge in (wdt:P161, wdt:P725) )}
                       UNION
                     {?castNode ?oedge ?otherj .
                     FILTER ( ?oedge in (wdt:P166) )}

                     ?showNode wdt:P1113 ?numEps.

                 } GROUP BY ?cast ?castNode ?gender ?show ?showNode ?imdb ?numEps ORDER BY DESC(?count)
                 DESC(?numEps)"""

    CHARACTER_INFO = """


                    SELECT ?cast ?castNode ?role ?roleNode ?gender ?award ?awardNode
                    WHERE
                    {
                      BIND(wd:%s AS ?item).

                      ?item p:%s ?castInfo.
                      ?castInfo ps:%s ?castNode.
                      ?castNode rdfs:label ?cast .
                      FILTER(LANG(?cast) = "en") .
                      FILTER (STR(?cast) = "{entity}").
                      
                      ?castInfo pq:P453 ?roleNode .
                      ?roleNode rdfs:label ?role .
                      FILTER(LANG(?role) = "en") .

                      OPTIONAL{
                        ?castNode wdt:P21 ?genderNode.
                        ?genderNode rdfs:label ?gender .
                        FILTER(LANG(?gender) = "en") .}

                      OPTIONAL{
                        ?item p:P166 ?awardInfo.
                        ?awardInfo pq:P1346 ?castNode.
                        ?awardInfo ps:P166 ?awardNode.
                        ?awardNode rdfs:label ?award .
                        FILTER(LANG(?award) = "en") .
                        }

                    } """

    character_system_info = """


                    SELECT ?cast ?castNode ?role ?roleNode ?gender (COUNT(?oedge) as ?count)
                    WHERE
                    {
                      BIND(wd:%s AS ?item).

                      ?item p:%s ?castInfo.
                      ?castInfo ps:%s ?castNode.
                      ?castNode rdfs:label ?cast .
                      FILTER(LANG(?cast) = "en") .
                      FILTER (STR(?cast) = "{entity}").
                      
                      ?castInfo pq:P453 ?roleNode .
                      ?roleNode rdfs:label ?role .
                      FILTER(LANG(?role) = "en") .

                      OPTIONAL{
                        ?castNode wdt:P21 ?genderNode.
                        ?genderNode rdfs:label ?gender .
                        FILTER(LANG(?gender) = "en") .}

                      {?otheri ?oedge ?castNode .
                      FILTER ( ?oedge in (wdt:P161, wdt:P725) )}
                           UNION
                       {?castNode ?oedge ?otherj .
                       FILTER ( ?oedge in (wdt:P166) )}


                    } GROUP BY ?cast ?castNode ?role ?roleNode ?gender ORDER BY DESC(?count) LIMIT 5
                                """

In [4]:


#import helper_functions as hf


#from queries.tv_queries import TVQueries

class TVFunctions(object):

    # def get_user_award_data():
    #     TVResponses.AWARD_USER_FIRST_RESPONSE

        #show , won_n, det, award,

    # show, genre1, genre2,
    def get_all_tv_data():
        #results = hf.query_sparql(TVQueries.GENRE_INFO)
        results = query_sparql(TVQueries.GENRE_INFO)
        all_data = []

        restruct_data = {"show":[], "genres":[]}

        for data in results["results"]["bindings"]:
            show = data["tv"]["value"]
            genre1 = data["genre"]["value"]

            response_inits = []
            response_questions = []

            if genre1.split(" ")[-1] == "television":
                genre1 = genre1 + " show"
            # genre1_id = data["genre"]["genre1_id"]
            s = "" if genre1[-1] == "s" else "s"

            if "genre1" in data or genre2 != genre1:
                genre2 = data["genre1"]["value"]
                if genre2.split(" ")[-1] == "television":
                    genre2 = genre2 + " show"

                s2 = "" if genre2[-1] == "s" else "s"

                triples = [(show, "genre", genre1), (show, "genre", genre2)]
                response_init_templates = TVResponses.GENRE_SYSTEM_FIRST_RESPONSE["init_2"]
                response_ques_templates = TVResponses.GENRE_SYSTEM_FIRST_RESPONSE["question_2"]
                ques_temp = []
                init_temp = []
                for response_template_str in response_init_templates:
                    response_inits.append(response_template_str.format(
                        show=show,
                        genre1=genre1,
                        genre2=genre2
                    ))
                    init_temp.append(response_template_str)
                for response_template_str in response_ques_templates:
                    response_questions.append(response_template_str.format(
                        genre1=genre1,
                        genre2=genre2,
                        s1=s,
                        s2=s2
                    ))
                    ques_temp.append(response_template_str)
                all_responses = []
                templates = []
                for i, inits in enumerate(response_inits):
                    for j, quest in enumerate(response_questions):
                        all_responses.append(inits + quest)
                        templates.append(init_temp[i] + ques_temp[j])
            else:
                ques_temp = []
                init_temp = []
                response_init_templates = TVResponses.GENRE_SYSTEM_FIRST_RESPONSE["init_1"]
                response_ques_templates = TVResponses.GENRE_SYSTEM_FIRST_RESPONSE["question_1"]
                triples = [(show, "genre", genre1)]
                for response_template_str in response_init_templates:
                    response_inits.append(response_template_str.format(
                        show=show,
                        genre=genre1,
                    ))
                    init_temp.append(response_template_str)
                for response_template_str in response_ques_templates:
                    response_questions.append(response_template_str.format(
                        genre=genre1,
                        s=s
                    ))
                    ques_temp.append(response_template_str)
                all_responses = []
                templates = []
                for inits in response_inits:
                    for quest in response_questions:
                        all_responses.append(inits + quest)
                        templates.append(init_temp[i] + ques_temp[j])
            all_data.append([triples, all_responses, templates, "genre", "tv"])

        random.shuffle(all_data)
        #train = all_data[hf.test_num:]
        train = all_data[test_num:]
        #test = all_data[:hf.test_num]
        test = all_data[:test_num]

        #Award
        #results = hf.query_sparql(TVQueries.AWARD_INFO)
        results = query_sparql(TVQueries.AWARD_INFO)
        # templates_year = TVResponses.AWARD_SYSTEM_FIRST_RESPONSE["intro_year"]
        # templates_noyear = TVResponses.AWARD_SYSTEM_FIRST_RESPONSE["intro_noyear"]
        #
        # templates_question = TVResponses.AWARD_SYSTEM_FIRST_RESPONSE["question"]
        all_data = []
        # show, det, award, year, # number of awards

        for data in results["results"]["bindings"]:
            tv = data['tv']["value"]
            awardlabel = data['award']["value"]
            if "date" in data:
                dateLabel = data['date']["value"].split("-")[0]
                triple = [(tv, "award received", awardlabel), (tv, "date", dateLabel)]
                det = "an" if awardlabel[0].lower() in ['a', 'e', 'i', 'o', 'u'] else "a"
                responses = []
                templates = []
                # if dateLabel:
                #     for template in templates_year:
                #         for tp in templates_question:
                #             responses.append(template.format(
                #                 show=tv,
                #                 award=awardlabel,
                #                 det = det,
                #                 year=dateLabel).strip()+tp)
                #             templates.append(template)
            else:
                triple = [(tv, "award received", awardlabel)]
                det = "an" if awardlabel[0].lower() in ['a', 'e', 'i', 'o', 'u'] else "a"
                templates = []
                responses = []
                # for template in templates_noyear:
                #     for tp in templates_question:
                #         responses.append(template.format(
                #             show=tv,
                #             award=awardlabel,
                #             det = det).strip()+tp)
                #         templates.append(template)
            all_data.append([triple, responses, templates, "award", "tv"])

        # hf.print_examples(train, test)

        return train, test

    def get_tv_award_data():

        #results = hf.query_sparql(TVQueries.AWARD_INFO)
        results = query_sparql(TVQueries.AWARD_INFO)
        # templates_year = TVResponses.AWARD_SYSTEM_FIRST_RESPONSE["intro_year"]
        # templates_noyear = TVResponses.AWARD_SYSTEM_FIRST_RESPONSE["intro_noyear"]
        #
        # templates_question = TVResponses.AWARD_SYSTEM_FIRST_RESPONSE["question"]
        all_data = []
         #show, det, award, year, # number of awards

        for data in results["results"]["bindings"]:
            tv = data['tv']["value"]
            awardlabel = data['award']["value"]
            if "date" in data:
                dateLabel = data['date']["value"].split("-")[0]
                triple = [(tv, "award received", awardlabel), (tv, "date", dateLabel)]
                det = "an" if awardlabel[0].lower() in ['a', 'e', 'i', 'o', 'u'] else "a"
                responses = []
                templates = []
                # if dateLabel:
                #     for template in templates_year:
                #         for tp in templates_question:
                #             responses.append(template.format(
                #                 show=tv,
                #                 award=awardlabel,
                #                 det = det,
                #                 year=dateLabel).strip()+tp)
                #             templates.append(template)
            else:
                triple = [(tv, "award received", awardlabel)]
                det = "an" if awardlabel[0].lower() in ['a', 'e', 'i', 'o', 'u'] else "a"
                templates = []
                responses = []
                # for template in templates_noyear:
                #     for tp in templates_question:
                #         responses.append(template.format(
                #             show=tv,
                #             award=awardlabel,
                #             det = det).strip()+tp)
                #         templates.append(template)
            all_data.append([triple, responses, templates, "award", "tv"])

        random.shuffle(all_data)
        #train = all_data[hf.test_num:]
        #test = all_data[:hf.test_num]
        train = all_data[test_num:]
        test = all_data[:test_num]

        # hf.print_examples(train, test)

        return train, test


    def get_tv_creator_show_data():

        #results = hf.query_sparql(TVQueries.CREATOR_SHOWS)
        results = query_sparql(TVQueries.CREATOR_SHOWS)
        templates_other_shows_directed = TVResponses.CREATOR_USER_FIRST_RESPONSE["other_shows_creator"]
      #  templates_other_shows_directed = TVResponses.CREATOR_SYSTEM_FIRST_RESPONSE["intro_other_shows_directed"]
        question = TVResponses.CREATOR_SYSTEM_FIRST_RESPONSE["question"]
        #templates_other_shows_creator = TVResponses.AWARD_SYSTEM_FIRST_RESPONSE["intro_noyear"]


        all_data = []
         #show, det, award, year, # number of awards

        for data in results["results"]["bindings"]:
            label = "creator_show"
            show1 = data["tv"]['value']
            creator = data["creator"]['value']
            gender = data["gender"]['value'] if "gender" in data else ""
            creator_id = data["creatorNode"]['value']
            show2 = data["show"]['value']
            start = data["start"]['value'] if "start" in data else ""
          #  end = data["end"]
           # self.named_entities[creator] = {"entity_type_dbpedia": "Person", "wiki_id": creator_id, "gender": gender}
            triple = [(show1, "creator", creator)]
            # if response_num == 1:
            response_templates = []
            templates = []
            for response_template_str in templates_other_shows_directed:
                response_templates.append(response_template_str.format(
                    show=show1,
                    creator=creator,
                    #poss=hf.getPronouns(gender, val_type="poss")
                    poss=getPronouns(gender, val_type="poss")
                ))
                templates.append(response_template_str)

            all_data.append([triple, response_templates,  templates,  "show_creator",  "tv"])

        random.shuffle(all_data)
        #train = all_data[hf.test_num:]
        #test = all_data[:hf.test_num]
        train = all_data[test_num:]
        test = all_data[:test_num]

        # hf.print_examples(train, test)

        return train, test

    def creator_show_data_2():
        #results = hf.query_sparql(TVQueries.CREATOR_SHOWS)
        results = query_sparql(TVQueries.CREATOR_SHOWS)
        all_data = []
        for data in results["results"]["bindings"]:
            label = "creator_show"
            show1 = data["tv"]['value']
            creator = data["creator"]['value']
            gender = data["gender"]['value'] if "gender" in data else ""
            creator_id = data["creatorNode"]['value']
            show = data["show"]['value']
            start = data["start"]['value'].split("-")[0] if "start" in data else ""
            end = data["end"]['value'].split("-")[0] if "end" in data else ""
            response_init = []
            response_question = []
            templates = []
            if label == "creator_show":
                response_question = TVResponses.CREATOR_USER_SECOND_RESPONSE["question_show"]
                if start == "":
                    for response_template_str in TVResponses.CREATOR_USER_SECOND_RESPONSE["creator"]["no_year"]:
                        for i in response_question:
                            response_init.append(response_template_str.format(
                                show=show,
                                creator=creator
                            )+i)
                            templates.append(response_template_str)
                    triple = [(show, "creator", creator)]
                else:
                    for response_template_str in TVResponses.CREATOR_USER_SECOND_RESPONSE["creator"]["year"]:
                        for i in response_question:
                            response_init.append(response_template_str.format(
                                show=show,
                                creator=creator,
                                start=start
                            ) + i)
                            templates.append(response_template_str)
                    triple = [(show, "creator", creator), (show, "start time", start)]



            all_data.append([triple, response_init, templates, "show_creator", "tv"])

        random.shuffle(all_data)
        #train = all_data[hf.test_num:]
        #test = all_data[:hf.test_num]
        train = all_data[test_num:]
        test = all_data[:test_num]

        # hf.print_examples(train, test)

        return train, test





        # # show, genre1, genre2,
        # def genre_data():
        #     results = hf.query_sparql(TVQueries.GENRE_INFO)
        #
        #     for data in results["results"]["bindings"]:
        #         show = data["show"]["value"]
        #         genre1 = data["genre"]["value"]
        #
        #         response_inits = []
        #         response_questions = []
        #
        #         if genre1.split(" ")[-1] == "television":
        #             genre1 = genre1 + " show"
        #         genre1_id = data["genre"]["genre1_id"]
        #         s = "" if genre1[-1] == "s" else "s"
        #
        #         if "genre2" in data:
        #             genre2 = data["genre1"]["value"]
        #             if genre2.split(" ")[-1] == "television":
        #                 genre2 = genre2 + " show"
        #
        #             s2 = "" if genre2[-1] == "s" else "s"
        #
        #             if self.curr_init_user:
        #                 response_init_templates = TVResponses.GENRE_USER_FIRST_RESPONSE["init_2"]
        #                 response_ques_templates = TVResponses.GENRE_USER_FIRST_RESPONSE["question_2"]
        #             else:
        #                 response_init_templates = Responses.GENRE_SYSTEM_FIRST_RESPONSE["init_2"]
        #                 response_ques_templates = Responses.GENRE_SYSTEM_FIRST_RESPONSE["question_2"]
        #
        #             for response_template_str in response_init_templates:
        #                 response_inits.append(response_template_str.format(
        #                     show=self.curr_entity,
        #                     genre1=genre1,
        #                     genre2=genre2
        #                 ))
        #             for response_template_str in response_ques_templates:
        #                 response_questions.append(response_template_str.format(
        #                     genre1=genre1,
        #                     genre2=genre2,
        #                     s1=s,
        #                     s2=s2
        #                 ))
        #         else:
        #             if self.curr_init_user:
        #                 response_init_templates = TVResponses.GENRE_USER_FIRST_RESPONSE["init_1"]
        #                 response_ques_templates = TVResponses.GENRE_USER_FIRST_RESPONSE["question_1"]
        #             else:
        #                 response_init_templates = TVResponses.GENRE_SYSTEM_FIRST_RESPONSE["init_1"]
        #                 response_ques_templates = TVResponses.GENRE_SYSTEM_FIRST_RESPONSE["question_1"]
        #
        #             for response_template_str in response_init_templates:
        #                 response_inits.append(response_template_str.format(
        #                     show=self.curr_entity,
        #                     genre=genre1,
        #                 ))
        #             for response_template_str in response_ques_templates:
        #                 response_questions.append(response_template_str.format(
        #                     genre=genre1,
        #                     s=s
        #                 ))
        #
        #         response_alternatives = [response_inits, response_questions]

    def get_character_info():
        #results = hf.query_sparql(TVQueries.CHARACTER_INFO)
        results = query_sparql(TVQueries.CHARACTER_INFO)
        templates = TVResponses.CHARACTER_SYSTEM_FIRST_RESPONSE
        templates_char_user_sec = TVResponses.CHARACTER_USER_SECOND_RESPONSE

        templates_user_first_response = TVResponses.AWARD_USER_FIRST_RESPONSE
        all_data = []
        label = "creator_show"
        # show, det, award, year, # number of awards
        for data in results["results"]["bindings"]:
            musician = data['musicianLabel']["value"]
            awardlabel = data['awardLabel']["value"]
            dateLabel = data['date']["value"]
            triple = (musician, "label", label)
            templates = []
            responses = []
            for template in templates:
                responses.append(template.format(
                    reclab=label,
                    new_mus=musician).strip())
            all_data.append([triple, responses])

        random.shuffle(all_data)
        #train = all_data[hf.test_num:]
        #test = all_data[:hf.test_num]
        train = all_data[test_num:]
        test = all_data[:test_num]

        # hf.print_examples(train, test)

        return train, test

In [5]:
import pandas as pd
import random
import csv
from collections import defaultdict

In [6]:
tv_functions = TVFunctions.get_all_tv_data

In [None]:
import pandas as pd
import random
import csv
from collections import defaultdict

#from queries.tv_queries import TVQueries
#from tv_functions import TVFunctions

# Step 1: Load in DA mapping
da_mapping = pd.read_csv("/kg-da-mapping/kg-da-mapping - TV.csv")


#Step 2: Generate a Dictionary/DataFrame that contains all the TV data that we can grab from
  #TV functions class function get all tv data will generate all the queries

tv_functions = TVFunctions.get_all_tv_data

#Step 3: Create DA triples by
  # 1. Pick a random number between min to max for given DA
  # 2. Name will be the show, so create a filter function that will grab those specific entities
    #  from the dictionary and add to the pseudo MR
  # 3. Based on the random number from 1, choose the rest of the attributes
       #    (will need to confirm quality with different combos)
  #4. Create the pseudo MR to pass
    #5. Create a a file containing
      # domain , da, pseudo mr, tst prompt
        #tv,
#       inform,
#      The Hard Times of RJ Berger  genre  American television sitcom. The Hard Times of RJ Berger  genre  LGBTI+ related TV series
 #     Here is a text: "The Hard Times of RJ Berger  genre  American television sitcom  The Hard Times of RJ Berger  genre  LGBTI+ related TV series". Here is a rewrite of the text, which is a verify attribute dialogue act: "

 """
Pseudo Code
  for i in da_mapping: 
    
   """