In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

excel_filepath = "/content/drive/MyDrive/submission_content_transparency.xlsx"
df = pd.read_excel(excel_filepath)

df

Unnamed: 0,filename,content,transparency_text
0,Australian Federal Police.txt,21 August 2023\nAustralian Federal\nPolice sub...,"transparency, accountability, fairness, privac..."
1,Office of the Australian Information Commissio...,"Department of Industry, Science and Resources\...",public trust and confidence in AI.4 The OAIC h...
2,Law Council of Australia.txt,Safe and responsible AI in\nAustralia\nDepartm...,transparency.....................................
3,Digital Industry Group Inc (DIGI).txt,"Department of Industry, Science and Resources,...",transparencyreport.google.com/youtube-policy/r...
4,Department of Health and Aged Care.txt,Department of Health and Aged Care\nSafe and R...,transparency of AI in the delivery of health c...
...,...,...,...
242,OOP.txt,sine-conic fractals,
243,Complexico.txt,Supporting responsible AI: Complexico's submis...,"transparency, accountability, and trust in the..."
244,Lilin Australia.txt,No! The definitions do not incorporate devices...,
245,IdeaSpies.txt,IdeaSpies is an open innovation platform shari...,


In [None]:
# prompt: Create another df where the filename includes one of the following words: Australian Federal Police, UNSW.ai, Woolworths Group, IBM, KPMG, Meta, Department of Health and Aged Care, Canva, Microsoft

keywords = ["Australian Federal Police", "UNSW.ai", "Woolworths Group", "IBM", "KPMG", "Meta", "Department of Health and Aged Care", "Canva", "Microsoft"]

# Assuming 'df' is already loaded as in the previous code snippet
df_filtered = df[df['filename'].str.contains('|'.join(keywords), case=False, na=False)]

df_filtered


Unnamed: 0,filename,content,transparency_text
0,Australian Federal Police.txt,21 August 2023\nAustralian Federal\nPolice sub...,"transparency, accountability, fairness, privac..."
4,Department of Health and Aged Care.txt,Department of Health and Aged Care\nSafe and R...,transparency of AI in the delivery of health c...
21,Woolworths Group.txt,11 August 2023\nVia email: DigitalEconomy@indu...,"transparency of outputs, data ethics, privacy ..."
28,Meta.txt,Meta’s Submission on\nSafe & Responsible AI in...,"transparency, openness and responsible innovat..."
77,Microsoft.txt,Microsoft submission on Safe and Responsible A...,transparency-led approach and addressing known...
85,KPMG Australia.txt,Safe and responsible AI in\nAustralia\nKPMG su...,"transparency and explainability, contestabilit..."
122,UNSW AI Institute.txt,UNSW AI Institute\nai.director@unsw.edu.au\n25...,Transparency\nAI will not be deployed responsi...
154,IBM Australia.txt,Consultation Team\nSafe and Responsible AI in ...,transparency that make clear the role of AI is...


In [None]:
def get_completion(system, user, format, model="gpt-4o-mini"):
    system_text = system
    user_text = user

    messages = [{"role": "developer", "content": system_text},
                {"role": "user", "content": user_text}
                ]
    response = client.chat.completions.create(
    model=model,
    messages=messages,
    response_format=format,
    temperature=1,
    max_tokens=2000,
    top_p=0.8,
    frequency_penalty=0.3
    )
    print(response)
    return response.choices[0].message.content

In [None]:
#Step 2

system_text_old = """
    You are a professional sociologist. When you read text you recognize themes and relationships.
    When asked to compare concepts you are able to recognize connections that are abstract or conceptual.
    """
system_text = """
    You are an information systems researcher. You are researching organizational perspectives on mandating AI transparency. The main research questions are
    to understand how organizations understand AI transparency, what their views are about mandating it, where and when they think transparency is most critical to mitigate risks,
    and what are their plans for responding the requirements. In order to answer these questions, you are doing thematic analysis of public consultation submissions
    to the Australian government's "Safe and responsible AI discussion paper" in 2023. You only focus on submissions coming from organizations, including industry associations, tech companies,
    consulting firms, government agencies, and academic institutions. The public consultation asks for responses on 20 questions, and the question you focus on is question 9 about AI transparency.
    """
response_format={
        "type": "json_schema",
        "json_schema": {
            "name": "Codes",
            "schema": {
                "type": "object",
                "properties": {
                    "code_name": {
                        "description": "Name of code",
                        "type": "string"
                    },
                    "description": {
                        "description": "Description of code",
                        "type": "string"
                    },
                    "quote": {
                        "description": "Quote from text",
                        "type": "string"
                    }
                }
            }
        }
    }

In [None]:
import json

df_theme = pd.DataFrame(columns=["file_name", "code_name", "description", "quote"])

for index, row in df_filtered.iterrows():
    text = row['transparency_text']
    user_text = f"""
        Given the following text:
        \"\"\"\n{text}\n\"\"\"

        Identify all themes in the text, provide a name for each theme in no more than 5 words,
        a condensed description of the theme, and a quote from the text that supports the theme.

        Format the response in a JSON format with "code_name", "description", and "quote" under the key "Themes".
    """
    response = get_completion(system_text, user_text, response_format)
    data = json.loads(response)
    theme = pd.DataFrame(data["Themes"])
    theme["file_name"] = row['filename']
    df_theme = pd.concat([df_theme, theme], ignore_index=True)


ChatCompletion(id='chatcmpl-BHfP5SOB0kgZLnBlXlmKdpoZaMua4', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "Themes": [\n    {\n      "code_name": "Importance of Transparency",\n      "description": "Transparency is crucial for trust and ethical AI use in policing.",\n      "quote": "the AFP will prioritise community engagement and transparency in AI development, acknowledging that successful policing within democratic societies is based on trust to apply powers fairly and without fear or favour."\n    },\n    {\n      "code_name": "Cautious Approach to AI",\n      "description": "The AFP adopts a cautious strategy regarding AI deployment to protect the community.",\n      "quote": "To date the AFP has taken a cautious approach to harnessing the potential of AI despite its early adoption and use by those with intent on harming Australians."\n    },\n    {\n      "code_name": "Need for Robust Governance",\n      "description": "E

In [None]:
df_theme

Unnamed: 0,file_name,code_name,description,quote
0,Australian Federal Police.txt,Importance of Transparency,Transparency is crucial for trust and ethical ...,the AFP will prioritise community engagement a...
1,Australian Federal Police.txt,Cautious Approach to AI,The AFP adopts a cautious strategy regarding A...,To date the AFP has taken a cautious approach ...
2,Australian Federal Police.txt,Need for Robust Governance,Effective governance frameworks are essential ...,The AFP is prioritising effective governance t...
3,Australian Federal Police.txt,Ethical Considerations in AI,AI use raises ethical questions regarding priv...,The dual-use nature of AI presents significant...
4,Australian Federal Police.txt,Stakeholder Engagement,Collaboration with stakeholders is necessary f...,The key is to proactively undertake due dilige...
...,...,...,...,...
66,IBM Australia.txt,Explainability of AI Systems,AI systems must be able to explain decisions.,...should be able to explain and contextualize...
67,IBM Australia.txt,AI FactSheets Concept,'FactSheets' help communicate AI information.,...IBM has adopted the use of AI Factsheets (a...
68,IBM Australia.txt,Global Coordination for Standards,Encouraging international best practices for t...,...Strengthen mechanisms for global coordinati...
69,IBM Australia.txt,Differentiation in Sector Regulation,'Different rules for different risks' applies ...,...it may make sense to differentiate obligati...


In [None]:
if df_theme is not None:
    excel_filepath = "/content/drive/MyDrive/codes_8_docs_2.xlsx"  # Replace with desired path
    df_theme.to_excel(excel_filepath, index=False)  # Set index=False to avoid writing row indices
    print(f"DataFrame exported to: {excel_filepath}")
else:
    print("DataFrame 'df_theme' is empty or None. Cannot export.")

DataFrame exported to: /content/drive/MyDrive/codes_8_docs_2.xlsx


In [None]:
import pandas as pd

excel_filepath = "/content/drive/MyDrive/codes_8_docs_2.xlsx"
df_theme = pd.read_excel(excel_filepath)

In [None]:
#Step 3

formatted_codes = df_theme['code_name'].to_string(index=False)

user_text_2 = f"""
Consider these topics:
\"\"\"\n{formatted_codes}\n\"\"\"

Determine how all the topics in the list of topics can be grouped together.
Topics can be in more than one group. Provide a name and description for each group, followed by all the topics in the group.

Format the response in a JSON format with "theme_name", "description", and "codes" under the key "Themes".
"""
response_format_2={
        "type": "json_schema",
        "json_schema": {
            "name": "Themes",
            "schema": {
                "type": "object",
                "properties": {
                    "theme_name": {
                        "description": "Name of theme group",
                        "type": "string"
                    },
                    "description": {
                        "description": "Description of theme group",
                        "type": "string"
                    },
                    "codes": {
                        "description": "Topics",
                        "type": "string"
                    }
                }
            }
        }
    }

In [None]:
result_2 = get_completion(system_text, user_text_2, response_format_2)
data2 = json.loads(result_2)
df_theme2 = pd.DataFrame(data2["Themes"])
df_theme2

ChatCompletion(id='chatcmpl-BHfSYpOx6vIkf5qJHOspaAwvCyO9v', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "Themes": [\n    {\n      "theme_name": "AI Transparency and Accountability",\n      "description": "This group focuses on the importance of transparency in AI systems and the need for accountability mechanisms to ensure ethical use.",\n      "codes": [\n        "Importance of Transparency",\n        "AI Transparency Importance",\n        "Ongoing Accountability",\n        "Risk-Based Transparency",\n        "Importance of Disclosure",\n        "Explainability of AI Systems",\n        "Algorithmic Accountability Needs"\n      ]\n    },\n    {\n      "theme_name": "Governance and Regulation Frameworks",\n      "description": "This theme encompasses the need for robust governance structures and regulatory frameworks to manage AI technologies effectively.",\n      "codes": [\n        "Need for Robust Governance",\n        "Re

Unnamed: 0,theme_name,description,codes
0,AI Transparency and Accountability,This group focuses on the importance of transp...,"[Importance of Transparency, AI Transparency I..."
1,Governance and Regulation Frameworks,This theme encompasses the need for robust gov...,"[Need for Robust Governance, Regulatory Framew..."
2,Ethical Considerations and Social Impact,This group addresses ethical considerations in...,"[Ethical Considerations in AI, Inclusivity in ..."
3,Stakeholder Engagement and Collaboration,This theme highlights the importance of engagi...,"[Stakeholder Engagement, International Collabo..."
4,Education and Public Awareness,This group emphasizes the need for education r...,"[Training for Workforce, Consumer Education on..."
5,'Risk-Based' Approach to AI Management,'Risk-based' approaches advocate for tailored ...,"[Risk-Based Approach Advocacy, 'High-Risk' AI ..."
6,'Transparency Tools' for Users and Public Trus...,'Transparency tools' are mechanisms designed t...,"[...Transparency Tools for Users..., ...Public..."


In [None]:
result_3 = get_completion(system_text, user_text_2, response_format_2)
data3 = json.loads(result_3)
df_theme3 = pd.DataFrame(data3["Themes"])
df_theme3

ChatCompletion(id='chatcmpl-BHfT6URvcqbepM5YVp4sYB5xwnuMy', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "Themes": [\n    {\n      "theme_name": "Importance of Transparency",\n      "description": "This group highlights the critical role of transparency in AI systems, focusing on its necessity for public trust, ethical considerations, and risk mitigation.",\n      "codes": [\n        "Importance of Transparency",\n        "AI Transparency Importance",\n        "Risk Mitigation through Transparency",\n        "Importance of Disclosure",\n        "Explainability of AI Systems"\n      ]\n    },\n    {\n      "theme_name": "Governance and Regulation",\n      "description": "This group emphasizes the need for robust governance structures and regulatory frameworks to manage AI effectively, ensuring ethical practices and accountability.",\n      "codes": [\n        "Need for Robust Governance",\n        "Regulatory Frameworks Develo

Unnamed: 0,theme_name,description,codes
0,Importance of Transparency,This group highlights the critical role of tra...,"[Importance of Transparency, AI Transparency I..."
1,Governance and Regulation,This group emphasizes the need for robust gove...,"[Need for Robust Governance, Regulatory Framew..."
2,Ethical Considerations in AI,This group covers the ethical implications of ...,"[Ethical Considerations in AI, Bias Mitigation..."
3,Stakeholder Engagement and Collaboration,This group focuses on the importance of engagi...,"[Stakeholder Engagement, Community Engagement ..."
4,Education and Awareness,This group highlights the significance of educ...,"[Training for Workforce, Consumer Education on..."
5,'High-Risk' AI Regulations,'High-Risk' regulations are focused on ensurin...,"['High-Risk' AI Regulations, 'High-Stakes Deci..."
6,'Risk-Based' Approaches to Regulation,'Risk-Based' approaches advocate for tailored ...,"['Risk-Based Approach Advocacy', 'Risk-Based T..."
7,'International Standards' Alignment,'International Standards' alignment discusses ...,"['Global Coordination for Standards', 'Interna..."


In [None]:
result_4 = get_completion(system_text, user_text_2, response_format_2)
data4 = json.loads(result_4)
df_theme4 = pd.DataFrame(data4["Themes"])
df_theme4

ChatCompletion(id='chatcmpl-BHfTh6DgiJ3HoCRdhzkhBF09TRykR', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "Themes": [\n    {\n      "theme_name": "AI Transparency and Importance",\n      "description": "This theme encompasses the critical role of transparency in AI systems, emphasizing the need for clear disclosure, accountability, and ethical considerations.",\n      "codes": "Importance of Transparency, AI Transparency Importance, Risk-Based Transparency, Importance of Disclosure"\n    },\n    {\n      "theme_name": "Governance and Regulation",\n      "description": "This theme focuses on the governance frameworks necessary for managing AI technologies responsibly, including regulatory recommendations and coordination across sectors.",\n      "codes": "Need for Robust Governance, Regulatory Frameworks Development, Centralized Governance Coordination, Regulatory Framework Recommendations, Regulatory Sandbox Proposal"\n    },\

Unnamed: 0,theme_name,description,codes
0,AI Transparency and Importance,This theme encompasses the critical role of tr...,"Importance of Transparency, AI Transparency Im..."
1,Governance and Regulation,This theme focuses on the governance framework...,"Need for Robust Governance, Regulatory Framewo..."
2,Ethics and Responsibility in AI,This theme highlights the ethical implications...,"Ethical Considerations in AI, Commitment to Re..."
3,Risk Management and Mitigation,This theme addresses the necessity for risk as...,"Risk-Based Approach Advocacy, Risk Mitigation ..."
4,Stakeholder Engagement and Collaboration,This theme emphasizes the need for engagement ...,"Stakeholder Engagement, Community Engagement i..."
5,Education and Awareness,This theme focuses on the importance of educat...,"Training for Workforce, Consumer Education on ..."
6,Transparency Tools and Techniques,This theme covers various tools and methods pr...,"Transparency Tools for Users, Open Access to A..."
7,Inclusivity and Fairness,This theme focuses on ensuring inclusivity in ...,"Inclusivity in AI Applications, Fairness and I..."


In [None]:
import pandas as pd

df_expanded = pd.DataFrame(columns=['code', 'theme', 'description'])

for index, row in df_theme2.iterrows():
  codes_str = row['codes']
  #codes_list = [code.strip() for code in codes_str.split(',')]
  for code in codes_str:
    new_row = pd.DataFrame({'code': [code], 'theme': [row['theme_name']], 'description': [row['description']]})
    df_expanded = pd.concat([df_expanded, new_row], ignore_index=True)

df_expanded

Unnamed: 0,code,theme,description
0,Importance of Transparency,AI Transparency and Accountability,This group focuses on the importance of transp...
1,AI Transparency Importance,AI Transparency and Accountability,This group focuses on the importance of transp...
2,Ongoing Accountability,AI Transparency and Accountability,This group focuses on the importance of transp...
3,Risk-Based Transparency,AI Transparency and Accountability,This group focuses on the importance of transp...
4,Importance of Disclosure,AI Transparency and Accountability,This group focuses on the importance of transp...
5,Explainability of AI Systems,AI Transparency and Accountability,This group focuses on the importance of transp...
6,Algorithmic Accountability Needs,AI Transparency and Accountability,This group focuses on the importance of transp...
7,Need for Robust Governance,Governance and Regulation Frameworks,This theme encompasses the need for robust gov...
8,Regulatory Frameworks Development,Governance and Regulation Frameworks,This theme encompasses the need for robust gov...
9,Regulatory Framework Recommendations,Governance and Regulation Frameworks,This theme encompasses the need for robust gov...


In [None]:
df_expanded2 = pd.DataFrame(columns=['code', 'theme', 'description'])

for index, row in df_theme3.iterrows():
  codes_str = row['codes']
  #codes_list = [code.strip() for code in codes_str.split(',')]
  for code in codes_str:
    new_row = pd.DataFrame({'code': [code], 'theme': [row['theme_name']], 'description': [row['description']]})
    df_expanded2 = pd.concat([df_expanded2, new_row], ignore_index=True)

df_expanded2


Unnamed: 0,code,theme,description
0,Importance of Transparency,Importance of Transparency,This group highlights the critical role of tra...
1,AI Transparency Importance,Importance of Transparency,This group highlights the critical role of tra...
2,Risk Mitigation through Transparency,Importance of Transparency,This group highlights the critical role of tra...
3,Importance of Disclosure,Importance of Transparency,This group highlights the critical role of tra...
4,Explainability of AI Systems,Importance of Transparency,This group highlights the critical role of tra...
5,Need for Robust Governance,Governance and Regulation,This group emphasizes the need for robust gove...
6,Regulatory Frameworks Development,Governance and Regulation,This group emphasizes the need for robust gove...
7,Centralized Governance Coordination,Governance and Regulation,This group emphasizes the need for robust gove...
8,Regulatory Framework Recommendations,Governance and Regulation,This group emphasizes the need for robust gove...
9,Risk-Based Regulation Approach,Governance and Regulation,This group emphasizes the need for robust gove...


In [None]:
df_expanded3 = pd.DataFrame(columns=['code', 'theme', 'description'])

for index, row in df_theme4.iterrows():
  codes_str = row['codes']
  codes_list = [code.strip() for code in codes_str.split(',')]
  for code in codes_list:
    new_row = pd.DataFrame({'code': [code], 'theme': [row['theme_name']], 'description': [row['description']]})
    df_expanded3 = pd.concat([df_expanded3, new_row], ignore_index=True)

df_expanded3

Unnamed: 0,code,theme,description
0,Importance of Transparency,AI Transparency and Importance,This theme encompasses the critical role of tr...
1,AI Transparency Importance,AI Transparency and Importance,This theme encompasses the critical role of tr...
2,Risk-Based Transparency,AI Transparency and Importance,This theme encompasses the critical role of tr...
3,Importance of Disclosure,AI Transparency and Importance,This theme encompasses the critical role of tr...
4,Need for Robust Governance,Governance and Regulation,This theme focuses on the governance framework...
5,Regulatory Frameworks Development,Governance and Regulation,This theme focuses on the governance framework...
6,Centralized Governance Coordination,Governance and Regulation,This theme focuses on the governance framework...
7,Regulatory Framework Recommendations,Governance and Regulation,This theme focuses on the governance framework...
8,Regulatory Sandbox Proposal,Governance and Regulation,This theme focuses on the governance framework...
9,Ethical Considerations in AI,Ethics and Responsibility in AI,This theme highlights the ethical implications...


In [None]:
if df_theme4 is not None:
    excel_filepath = "/content/drive/MyDrive/themes_8_docs_3.xlsx"  # Replace with desired path
    df_theme4.to_excel(excel_filepath, index=False)  # Set index=False to avoid writing row indices
    print(f"DataFrame exported to: {excel_filepath}")
else:
    print("DataFrame 'df_theme' is empty or None. Cannot export.")

DataFrame exported to: /content/drive/MyDrive/themes_8_docs_3.xlsx


In [None]:
if df_expanded is not None:
    excel_filepath = "/content/drive/MyDrive/themes_8_docs_expanded_2.xlsx"  # Replace with desired path
    df_expanded.to_excel(excel_filepath, index=False)  # Set index=False to avoid writing row indices
    print(f"DataFrame exported to: {excel_filepath}")
else:
    print("DataFrame 'df_theme' is empty or None. Cannot export.")

DataFrame exported to: /content/drive/MyDrive/themes_8_docs_expanded_2.xlsx


In [None]:
excel_filepath2 = "/content/drive/MyDrive/themes_8_docs_expanded_2.xlsx"
excel_filepath3 = "/content/drive/MyDrive/themes_8_docs_2_expanded_2.xlsx"
excel_filepath4 = "/content/drive/MyDrive/themes_8_docs_3_expanded_2.xlsx"
df_theme2 = pd.read_excel(excel_filepath2)
df_theme3 = pd.read_excel(excel_filepath3)
df_theme4 = pd.read_excel(excel_filepath4)

In [None]:
#Step 4

formatted_themes1 = df_theme2['theme'].unique()
formatted_themes1 = ', '.join(formatted_themes1)
formatted_themes2 = df_theme3['theme'].unique()
formatted_themes2 = ', '.join(formatted_themes2)
formatted_themes3 = df_theme4['theme'].unique()
formatted_themes3 = ', '.join(formatted_themes3)
#formatted_themes2 = df_theme3['theme'].unique().to_string(index=False)

user_text_4 = f"""
Given the following topics:
\"\"\"\n{formatted_codes}\n\"\"\"

Determine how all the topics in the list of topics can be grouped together.
Topics can be in more than one group. Provide a name and description for each group, followed by all the topics in the group.

Here are the three groupings of topics proposed:
\"\"\"\nOption 1:{formatted_themes1}\n\"\"\"
\"\"\"\nOption 2:{formatted_themes2}\n\"\"\"
\"\"\"\nOption 3:{formatted_themes3}\n\"\"\"

List the areas for refinement and faulty logic of each answer option. Let’s work this out in a step by step way to be sure we have all the errors:
"""

response_format_4={
        "type": "text"
    }


In [None]:
result_4 = get_completion(system_text, user_text_4, response_format_4)

In [None]:
result_4

'Let\'s analyze each of the proposed options step by step to identify areas for refinement and any faulty logic. \n\n### Option 1:\n**Groups:**\n1. **AI Transparency and Accountability**\n2. **Governance and Regulation Frameworks**\n3. **Ethical Considerations and Social Impact**\n4. **Stakeholder Engagement and Collaboration**\n5. **Education and Public Awareness**\n6. **\'Risk-Based\' Approach to AI Management**\n7. **\'Transparency Tools\' for Users and Public Trust Initiatives**\n\n**Areas for Refinement/Faulty Logic:**\n- The grouping "AI Transparency and Accountability" includes both transparency topics and accountability, but it may lack specificity regarding how these two concepts interact.\n- "Governance and Regulation Frameworks" could be more explicit in differentiating between various regulatory approaches or frameworks (e.g., risk-based vs. centralized).\n- The term "Ethical Considerations and Social Impact" could be refined to ensure it captures specific ethical dilemmas 

In [None]:
result_4 = f"""
Let's analyze each of the proposed options step by step to identify areas for refinement and any faulty logic.

### Option 1:
**Groups:**
1. **AI Transparency and Accountability**
2. **Governance and Regulation Frameworks**
3. **Ethical Considerations and Social Impact**
4. **Stakeholder Engagement and Collaboration**
5. **Education and Public Awareness**
6. **'Risk-Based' Approach to AI Management**
7. **'Transparency Tools' for Users and Public Trust Initiatives**

**Areas for Refinement/Faulty Logic:**
- The grouping "AI Transparency and Accountability" includes both transparency topics and accountability, but it may lack specificity regarding how these two concepts interact.
- "Governance and Regulation Frameworks" could be more explicit in differentiating between various regulatory approaches or frameworks (e.g., risk-based vs. centralized).
- The term "Ethical Considerations and Social Impact" could be refined to ensure it captures specific ethical dilemmas versus general social impact.
- "Education and Public Awareness" could be further broken down into different types of education (e.g., workforce training vs. consumer education).
- The inclusion of both "Risk-Based Approach to AI Management" and "'Risk-Based' Approach to Regulation" may lead to redundancy as they seem to convey similar ideas.
- "Transparency Tools for Users and Public Trust Initiatives" could be more focused; combining these two distinct concepts may dilute their individual importance.

### Option 2:
**Groups:**
1. **Importance of Transparency**
2. **Governance and Regulation**
3. **Ethical Considerations in AI**
4. **Stakeholder Engagement and Collaboration**
5. **Education and Awareness**
6. **'High-Risk' AI Regulations**
7. **'Risk-Based' Approaches to Regulation**
8. **'International Standards' Alignment**

**Areas for Refinement/Faulty Logic:**
- The group "Importance of Transparency" seems somewhat vague; it might benefit from specifying what aspects of transparency are being prioritized.
- "Governance and Regulation" could be broken down further into specific frameworks or mechanisms, such as centralized governance or self-regulation.
- Including both "High-Risk AI Regulations" and "'Risk-Based' Approaches to Regulation" raises concerns about redundancy, as both address risk management but from different perspectives.
- The term "International Standards Alignment" might not directly correlate with the other groups unless there is a clearer link to how it affects governance or ethical considerations.

### Option 3:
**Groups:**
1. **AI Transparency and Importance**
2. **Governance and Regulation**
3. **Ethics and Responsibility in AI**
4. **Risk Management and Mitigation**
5. **Stakeholder Engagement and Collaboration**
6. **Education and Awareness**
7. **Transparency Tools and Techniques**
8. **Inclusivity and Fairness**

**Areas for Refinement/Faulty Logic:**
- The title "AI Transparency and Importance" could be confusing; it's unclear if it focuses on the importance of transparency itself or transparency's role within AI.
- The term "Governance and Regulation" is broad; more specific categories might help clarify different regulatory approaches (e.g., self-regulation vs government mandates).
- "Ethics and Responsibility in AI" might benefit from further elaboration on how responsibility translates into practical guidelines or actions.
- While “Risk Management” is crucial, the connection between this group and others like “Inclusivity” may not be explicit, requiring further explanation on how inclusivity relates to risk management.
- Including both “Transparency Tools” as well as general references to transparency in multiple groups may create overlap without clear distinctions.

### Summary
Each option has strengths but also presents opportunities for refinement in clarity, specificity, differentiation, redundancy, or potential overlaps among groups that could confuse the relationships between concepts related to AI transparency, governance, ethics, stakeholder engagement, education, regulation, risk management, inclusivity, fairness, etc.

In refining these options further, it's important to consider clearer definitions for each group while minimizing redundancy between them—ensuring that each group's content adds distinct value to the overall understanding of the topics related to mandating AI transparency in organizations.
"""

In [None]:
#Step 5

user_text_5 = f"""
Given the following topics:
\"\"\"\n{formatted_codes}\n\"\"\"

Determine how all the topics in the list of topics can be grouped together.
Topics can be in more than one group. Provide a name and description for each group, followed by all the topics in the group.

Here are the three groupings of topics proposed:
\"\"\"\nOption 1:{formatted_themes1}\n\"\"\"
\"\"\"\nOption 2:{formatted_themes2}\n\"\"\"
\"\"\"\nOption 3:{formatted_themes3}\n\"\"\"

Here is the assessment of the three groupings:
\"\"\"\n{result_4}

You are a resolver tasked with finding the answers that best determines how all the topics in the list of topics can be grouped together.
1) removing any redundant or duplicate answers.
2) improving the answers based on the analysis of flaws
3) printing the improved answer in full
Let’s work this one out in a step by step way:

Format the response in a JSON format with "theme_name", "description", and "codes" under the key "Themes".
"""

In [None]:
result_5 = get_completion(system_text, user_text_5, response_format_2)

ChatCompletion(id='chatcmpl-BJuiloCEv6982Elj6gWkz2qOLE7Ku', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "Themes": [\n    {\n      "theme_name": "AI Transparency and Accountability",\n      "description": "Focuses on the significance of transparency in AI systems and the accountability mechanisms that ensure responsible AI usage.",\n      "codes": [\n        "Importance of Transparency",\n        "AI Transparency Importance",\n        "Transparency Tools for Users",\n        "Explainability of AI Systems",\n        "Transparent Disclosure Obligations"\n      ]\n    },\n    {\n      "theme_name": "Governance and Regulation Frameworks",\n      "description": "Addresses the structures, frameworks, and approaches to regulate AI technologies, including the need for clear definitions and consistency in regulations.",\n      "codes": [\n        "Governance and Regulation",\n        "Regulatory Frameworks Development",\n        "\'Hi

NameError: name 'json' is not defined

In [None]:
import json

data5 = json.loads(result_5)

data5

{'Themes': [{'theme_name': 'AI Transparency and Accountability',
   'description': 'Focuses on the significance of transparency in AI systems and the accountability mechanisms that ensure responsible AI usage.',
   'codes': ['Importance of Transparency',
    'AI Transparency Importance',
    'Transparency Tools for Users',
    'Explainability of AI Systems',
    'Transparent Disclosure Obligations']},
  {'theme_name': 'Governance and Regulation Frameworks',
   'description': 'Addresses the structures, frameworks, and approaches to regulate AI technologies, including the need for clear definitions and consistency in regulations.',
   'codes': ['Governance and Regulation',
    'Regulatory Frameworks Development',
    "'High-Risk' AI Regulations",
    "'Risk-Based' Approaches to Regulation",
    'Centralized Governance Coordination']},
  {'theme_name': 'Ethical Considerations in AI',
   'description': 'Explores ethical dilemmas associated with AI deployment, including data ethics, bias mi

In [None]:
df_themefinal = pd.DataFrame(data5["Themes"])
df_themefinal

Unnamed: 0,theme_name,description,codes
0,AI Transparency and Accountability,Focuses on the significance of transparency in...,"[Importance of Transparency, AI Transparency I..."
1,Governance and Regulation Frameworks,"Addresses the structures, frameworks, and appr...","[Governance and Regulation, Regulatory Framewo..."
2,Ethical Considerations in AI,Explores ethical dilemmas associated with AI d...,"[Ethical Considerations in AI, Bias Mitigation..."
3,Stakeholder Engagement and Collaboration,Emphasizes the importance of engaging various ...,"[Stakeholder Engagement, Collaboration with Go..."
4,Education and Public Awareness,Highlights the necessity of educating both org...,"[Education and Public Awareness, AI Literacy E..."
5,'Risk-Based' Approach to AI Management,'Risk-Based' approaches advocate for proportio...,"['Risk-Based' Approach Advocacy, 'Risk-Based' ..."
6,'Inclusivity and Fairness','Inclusivity and Fairness' refers to ensuring ...,"['Inclusivity in AI Applications', 'Fairness a..."


In [None]:
df_expandedfinal = pd.DataFrame(columns=['code', 'theme', 'description'])

for index, row in df_themefinal.iterrows():
  codes_str = row['codes']
  #codes_list = [code.strip() for code in codes_str.split(',')]
  for code in codes_str:
    new_row = pd.DataFrame({'code': [code], 'theme': [row['theme_name']], 'description': [row['description']]})
    df_expandedfinal = pd.concat([df_expandedfinal, new_row], ignore_index=True)

df_expandedfinal

Unnamed: 0,code,theme,description
0,Importance of Transparency,AI Transparency and Accountability,Focuses on the significance of transparency in...
1,AI Transparency Importance,AI Transparency and Accountability,Focuses on the significance of transparency in...
2,Transparency Tools for Users,AI Transparency and Accountability,Focuses on the significance of transparency in...
3,Explainability of AI Systems,AI Transparency and Accountability,Focuses on the significance of transparency in...
4,Transparent Disclosure Obligations,AI Transparency and Accountability,Focuses on the significance of transparency in...
5,Governance and Regulation,Governance and Regulation Frameworks,"Addresses the structures, frameworks, and appr..."
6,Regulatory Frameworks Development,Governance and Regulation Frameworks,"Addresses the structures, frameworks, and appr..."
7,'High-Risk' AI Regulations,Governance and Regulation Frameworks,"Addresses the structures, frameworks, and appr..."
8,'Risk-Based' Approaches to Regulation,Governance and Regulation Frameworks,"Addresses the structures, frameworks, and appr..."
9,Centralized Governance Coordination,Governance and Regulation Frameworks,"Addresses the structures, frameworks, and appr..."


In [None]:
if df_expandedfinal is not None:
    excel_filepath = "/content/drive/MyDrive/themes_8_docs_expanded_final.xlsx"  # Replace with desired path
    df_expandedfinal.to_excel(excel_filepath, index=False)  # Set index=False to avoid writing row indices
    print(f"DataFrame exported to: {excel_filepath}")
else:
    print("DataFrame 'df_theme' is empty or None. Cannot export.")

DataFrame exported to: /content/drive/MyDrive/themes_8_docs_expanded_final.xlsx
