Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Integration]: GitLab Integration #5768

Merged
merged 6 commits into from
May 3, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 48 additions & 0 deletions mindsdb/integrations/handlers/gitlab_handler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# GitLab Handler

This is the implementation of the GitLab handler for MindsDB. This interface support to connect to GitLab API and pull data into MindsDB

## GitLab Handler Implementation

This handler was implemented using the [python-gitlab](https://github.com/python-gitlab/python-gitlab) library.
python-gitlab is a Python library that wraps GitLab API.

## GitLab Handler Initialization

The GitLab handler is initialized with the following parameters:

- `repository`: a required name of a GitLab repository to connect to
- `api_key`: an optional GitLab API key to use for authentication

## Implemented Features

- GitLab Issues table for a given Repository (Support LIMIT, WHERE, ORDER BY, SELECT - column)

## Usage
In order to make use of this handler and connect to a gitlab api in MindsDB, the following syntax can be used,

~~~~sql
CREATE DATABASE mindsdb_gitlab
WITH ENGINE = 'gitlab',
PARAMETERS = {
"repository": "gitlab-org/gitlab",
"api_key": "api_key", -- optional GitLab API key
};
~~~~

Now, you can use this established connection to query your table as follows,
~~~~sql
SELECT * FROM mindsdb_gitlab.issues
~~~~

~~~~sql
SELECT number, state, creator, assignee, title, labels
FROM mindsdb_gitlab.issues
WHERE state="open"
ORDER BY created ASC, creator DESC
LIMIT 10
~~~~

## What is next??
- GitLab Merge Requests table for a given Repository
- GitLab ....
9 changes: 9 additions & 0 deletions mindsdb/integrations/handlers/gitlab_handler/__about__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
__title__ = "MindsDB GitLab handler"
__package_name__ = "mindsdb_gitlab_handler"
__version__ = "0.0.1"
__description__ = "MindsDB handler for GitLab"
__author__ = "Senali Dilumika"
__github__ = "https://github.com/mindsdb/mindsdb"
__pypi__ = "https://pypi.org/project/mindsdb/"
__license__ = "GPL-3.0"
__copyright__ = "Copyright 2023 - mindsdb"
19 changes: 19 additions & 0 deletions mindsdb/integrations/handlers/gitlab_handler/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from mindsdb.integrations.libs.const import HANDLER_TYPE

from .__about__ import __version__ as version, __description__ as description
try:
from .gitlab_handler import GitlabHandler as Handler
import_error = None
except Exception as e:
Handler = None
import_error = e

title = "GitLab"
name = "gitlab"
type = HANDLER_TYPE.DATA
icon_path = "icon.svg"

__all__ = [
"Handler", "version", "name", "type", "title", "description",
"import_error", "icon_path",
]
83 changes: 83 additions & 0 deletions mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import gitlab

from mindsdb.integrations.handlers.gitlab_handler.gitlab_tables import GitlabIssuesTable
from mindsdb.integrations.libs.api_handler import APIHandler
from mindsdb.integrations.libs.response import (
HandlerStatusResponse as StatusResponse,
)

from mindsdb.utilities.log import get_log
from mindsdb_sql import parse_sql

logger = get_log("integrations.gitlab_handler")


class GitlabHandler(APIHandler):
"""The GitLab handler implementation"""

def __init__(self, name: str, **kwargs):
""" constructor
Args:
name (str): the handler name
"""
super().__init__(name)

connection_data = kwargs.get("connection_data", {})
self.connection_data = connection_data
self.repository = connection_data["repository"]
self.kwargs = kwargs

self.connection = None
self.is_connected = False

gitlab_issues_data = GitlabIssuesTable(self)
self._register_table("issues", gitlab_issues_data)

def connect(self) -> StatusResponse:
""" Set up the connections required by the handler
Returns:
HandlerStatusResponse
"""

connection_kwargs = {}

if self.connection_data.get("api_key", None):
connection_kwargs["private_token"] = self.connection_data["api_key"]

self.connection = gitlab.Gitlab(**connection_kwargs)
self.is_connected = True

return self.connection

def check_connection(self) -> StatusResponse:
"""Check connection to the handler
Returns:
HandlerStatusResponse
"""
response = StatusResponse(False)

try:
self.connect()
if self.connection_data.get("api_key", None):
logger.info(f"Authenticated as user")
else:
logger.info("Proceeding without an API key")

response.success = True
except Exception as e:
logger.error(f"Error connecting to GitLab API: {e}!")
response.error_message = e

self.is_connected = response.success

return response

def native_query(self, query: str) -> StatusResponse:
"""Receive and process raw query.
Args:
query (str): query in a native format
Returns:
HandlerResponse
"""
ast = parse_sql(query, dialect="mindsdb")
return self.query(ast)
191 changes: 191 additions & 0 deletions mindsdb/integrations/handlers/gitlab_handler/gitlab_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
import pandas as pd

from typing import List

from mindsdb.integrations.libs.api_handler import APITable
from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions
from mindsdb.utilities.log import get_log

from mindsdb_sql.parser import ast

logger = get_log("integrations.gitlab_handler")

class GitlabIssuesTable(APITable):
"""The GitLab Issue Table implementation"""

def select(self, query: ast.Select) -> pd.DataFrame:
"""Pulls data from the GitLab "List repository issues" API
Args:
query: SELECT
Returns:
DataFrame
Raises:
ValueError
"""

conditions = extract_comparison_conditions(query.where)

if query.limit:
total_results = query.limit.value
else:
total_results = 20

issues_kwargs = {}
order_by_conditions = {}

if query.order_by and len(query.order_by) > 0:
order_by_conditions["columns"] = []
order_by_conditions["ascending"] = []

for an_order in query.order_by:
if an_order.field.parts[0] != "issues":
senali-d marked this conversation as resolved.
Show resolved Hide resolved
next
senali-d marked this conversation as resolved.
Show resolved Hide resolved

if an_order.field.parts[1] in ["created", "updated"]:
if issues_kwargs != {}:
raise ValueError(
"Duplicate order conditions found for created/updated"
)

issues_kwargs["sort"] = an_order.field.parts[1]
issues_kwargs["direction"] = an_order.direction
elif an_order.field.parts[1] in self.get_columns():
order_by_conditions["columns"].append(an_order.field.parts[1])

if an_order.direction == "ASC":
order_by_conditions["ascending"].append(True)
else:
order_by_conditions["ascending"].append(False)
senali-d marked this conversation as resolved.
Show resolved Hide resolved
else:
raise ValueError(
f"Order by unknown column {an_order.field.parts[1]}"
)

for a_where in conditions:
if a_where[1] == "state":
if a_where[0] != "=":
raise ValueError("Unsupported where operation for state")
if a_where[2] not in ["open", "closed", "all"]:
raise ValueError(
f"Unsupported where argument for state {a_where[2]}"
)

issues_kwargs["state"] = a_where[2]

continue
if a_where[1] == "labels":
if a_where[0] != "=":
raise ValueError("Unsupported where operation for state")

issues_kwargs["labels"] = a_where[2].split(",")

continue
if a_where[1] in ["assignee", "creator"]:
if a_where[0] != "=":
raise ValueError(f"Unsupported where operation for {a_where[1]}")

issues_kwargs[a_where[1]] = a_where[2]
else:
raise ValueError(f"Unsupported where argument {a_where[1]}")

self.handler.connect()

gitlab_issues_df = pd.DataFrame(columns=self.get_columns())

start = True
while start:
try:
issues = self.handler.connection.projects.get(
self.handler.repository
).issues.list()
for issue in issues:

logger.debug(f"Processing issue {issue.iid}")

gitlab_issues_df = pd.concat(
ZoranPandovski marked this conversation as resolved.
Show resolved Hide resolved
[
gitlab_issues_df,
pd.DataFrame(
[
{
"number": issue.iid,
"title": issue.title,
"state": issue.state,
"creator": issue.author["name"],
"closed_by": issue.closed_by
if issue.closed_by
else None,
"labels": ",".join(
[label for label in issue.labels]
),
"assignees": ",".join(
[
assignee["name"]
for assignee in issue.assignees
]
),
"body": issue.description,
"created": issue.created_at,
"updated": issue.updated_at,
"closed": issue.closed_at,
}
]
),
]
)

if gitlab_issues_df.shape[0] >= total_results:
break
except IndexError:
break

if gitlab_issues_df.shape[0] >= total_results:
break
else:
start = False

selected_columns = []
for target in query.targets:
if isinstance(target, ast.Star):
selected_columns = self.get_columns()
break
elif isinstance(target, ast.Identifier):
selected_columns.append(target.parts[-1])
else:
raise ValueError(f"Unknown query target {type(target)}")

if len(gitlab_issues_df) == 0:
gitlab_issues_df = pd.DataFrame([], columns=selected_columns)
else:
gitlab_issues_df.columns = self.get_columns()
for col in set(gitlab_issues_df.columns).difference(set(selected_columns)):
gitlab_issues_df = gitlab_issues_df.drop(col, axis=1)

if len(order_by_conditions.get("columns", [])) > 0:
gitlab_issues_df = gitlab_issues_df.sort_values(
by=order_by_conditions["columns"],
ascending=order_by_conditions["ascending"],
)

return gitlab_issues_df

def get_columns(self) -> List[str]:
"""Gets all columns to be returned in pandas DataFrame responses
Returns
-------
List[str]: list of columns
"""

return [
"number",
"title",
"state",
"creator",
"closed_by",
"labels",
"assignees",
"body",
"created",
"updated",
"closed",
]
1 change: 1 addition & 0 deletions mindsdb/integrations/handlers/gitlab_handler/icon.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python-gitlab